/** * This method is used to allocate * @param context * @param deviceId */ protected void getDeviceBuffers(CudaContext context, int deviceId) { NativeOps nativeOps = NativeOpsHolder.getInstance().getDeviceNativeOps(); //((CudaExecutioner) Nd4j.getExecutioner()).getNativeOps(); // we hardcode sizeOf to sizeOf(double) int sizeOf = 8; Pointer reductionPointer = nativeOps.mallocDevice(16385 * sizeOf * 2, new CudaPointer(deviceId), 0); if (reductionPointer == null) throw new IllegalStateException("Can't allocate [DEVICE] reduction buffer memory!"); nativeOps.memsetAsync(reductionPointer, 0, 16385 * sizeOf * 2, 0, context.getOldStream()); context.syncOldStream(); Pointer allocationPointer = nativeOps.mallocDevice(1024 * 1024, new CudaPointer(deviceId), 0); if (allocationPointer == null) throw new IllegalStateException("Can't allocate [DEVICE] allocation buffer memory!"); Pointer scalarPointer = nativeOps.mallocHost(1 * sizeOf, 0); if (scalarPointer == null) throw new IllegalStateException("Can't allocate [HOST] scalar buffer memory!"); context.setBufferScalar(scalarPointer); context.setBufferAllocation(allocationPointer); context.setBufferReduction(reductionPointer); Pointer specialPointer = nativeOps.mallocDevice(1024 * 1024 * sizeOf, new CudaPointer(deviceId), 0); if (specialPointer == null) throw new IllegalStateException("Can't allocate [DEVICE] special buffer memory!"); nativeOps.memsetAsync(specialPointer, 0, 65536 * sizeOf, 0, context.getOldStream()); context.setBufferSpecial(specialPointer); }
/** * This method is used to allocate * @param context * @param deviceId */ protected void getDeviceBuffers(CudaContext context, int deviceId) { NativeOps nativeOps = NativeOpsHolder.getInstance().getDeviceNativeOps(); //((CudaExecutioner) Nd4j.getExecutioner()).getNativeOps(); // we hardcode sizeOf to sizeOf(double) int sizeOf = 8; Pointer reductionPointer = nativeOps.mallocDevice(16385 * sizeOf * 2, new CudaPointer(deviceId), 0); if (reductionPointer == null) throw new IllegalStateException("Can't allocate [DEVICE] reduction buffer memory!"); nativeOps.memsetAsync(reductionPointer, 0, 16385 * sizeOf * 2, 0, context.getOldStream()); context.syncOldStream(); Pointer allocationPointer = nativeOps.mallocDevice(1024 * 1024, new CudaPointer(deviceId), 0); if (allocationPointer == null) throw new IllegalStateException("Can't allocate [DEVICE] allocation buffer memory!"); Pointer scalarPointer = nativeOps.mallocHost(1 * sizeOf, 0); if (scalarPointer == null) throw new IllegalStateException("Can't allocate [HOST] scalar buffer memory!"); context.setBufferScalar(scalarPointer); context.setBufferAllocation(allocationPointer); context.setBufferReduction(reductionPointer); Pointer specialPointer = nativeOps.mallocDevice(1024 * 1024 * sizeOf, new CudaPointer(deviceId), 0); if (specialPointer == null) throw new IllegalStateException("Can't allocate [DEVICE] special buffer memory!"); nativeOps.memsetAsync(specialPointer, 0, 65536 * sizeOf, 0, context.getOldStream()); context.setBufferSpecial(specialPointer); }
int ret = NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream()); if (ret == 0) throw new ND4JIllegalStateException("memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
val context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext(); int ret = NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(ptr, 0, requiredMemory, 0, context.getSpecialStream()); if (ret == 0) throw new ND4JIllegalStateException("memset failed device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
nativeOps.memsetAsync(pair.getDevicePointer(), 0, reqMemory, 0, context.getSpecialStream()); context.getSpecialStream().synchronize();
nativeOps.memsetAsync(pair.getDevicePointer(), 0, reqMemory, 0, context.getSpecialStream()); context.getSpecialStream().synchronize();
CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext(); int i = NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(ptr, 0, bytes, 0, context.getSpecialStream()); if (i == 0) throw new ND4JIllegalStateException("memset failed on device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext(); int i = NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(ptr, 0, bytes, 0, context.getSpecialStream()); if (i == 0) throw new ND4JIllegalStateException("memset failed on device_" + Nd4j.getAffinityManager().getDeviceForCurrentThread());
NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(AtomicAllocator.getInstance().getPointer(array, context),0, array.data().length() * Nd4j.sizeOfDataType(array.data().dataType()),0, context.getOldStream());
NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(allocationPoint.getDevicePointer(), 0, length * elementSize, 0, context.getSpecialStream());
@Override public void memset(INDArray array) { if (array.isView()) { array.assign(0.0); // we don't want any mGRID activations here Nd4j.getExecutioner().commit(); return; } // we want to be sure we have no trails left in mGRID Nd4j.getExecutioner().push(); AllocationPoint point = AtomicAllocator.getInstance().getAllocationPoint(array); if (point.getAllocationStatus() == AllocationStatus.DEVICE) { CudaContext context = (CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext(); NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(AtomicAllocator.getInstance().getPointer(array, context),0, array.data().length() * Nd4j.sizeOfDataType(array.data().dataType()),0, context.getOldStream()); // we also memset host pointer Pointer.memset(AtomicAllocator.getInstance().getHostPointer(array), 0, array.data().length() * Nd4j.sizeOfDataType(array.data().dataType())); // better be safe then sorry context.getOldStream().synchronize(); point.tickDeviceWrite(); point.tickHostRead(); } else if (point.getAllocationStatus() == AllocationStatus.HOST) { Nd4j.getExecutioner().commit(); // just casual memset Pointer.memset(AtomicAllocator.getInstance().getHostPointer(array), 0, array.data().length() * Nd4j.sizeOfDataType(array.data().dataType())); point.tickHostWrite(); } }
NativeOpsHolder.getInstance().getDeviceNativeOps().memsetAsync(allocationPoint.getDevicePointer(), 0, length * elementSize, 0, context.getSpecialStream());