public Workspace(long size) { super(NativeOpsHolder.getInstance().getDeviceNativeOps().mallocDevice(size, null, 0)); deallocator(new Deallocator() { @Override public void deallocate() { NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(Workspace.this, null); } }); } }
public Workspace(long size) { super(NativeOpsHolder.getInstance().getDeviceNativeOps().mallocDevice(size, null, 0)); deallocator(new Deallocator() { @Override public void deallocate() { NativeOpsHolder.getInstance().getDeviceNativeOps().freeDevice(Workspace.this, null); } }); } }
/** * This method is used to allocate * @param context * @param deviceId */ protected void getDeviceBuffers(CudaContext context, int deviceId) { NativeOps nativeOps = NativeOpsHolder.getInstance().getDeviceNativeOps(); //((CudaExecutioner) Nd4j.getExecutioner()).getNativeOps(); // we hardcode sizeOf to sizeOf(double) int sizeOf = 8; Pointer reductionPointer = nativeOps.mallocDevice(16385 * sizeOf * 2, new CudaPointer(deviceId), 0); if (reductionPointer == null) throw new IllegalStateException("Can't allocate [DEVICE] reduction buffer memory!"); nativeOps.memsetAsync(reductionPointer, 0, 16385 * sizeOf * 2, 0, context.getOldStream()); context.syncOldStream(); Pointer allocationPointer = nativeOps.mallocDevice(1024 * 1024, new CudaPointer(deviceId), 0); if (allocationPointer == null) throw new IllegalStateException("Can't allocate [DEVICE] allocation buffer memory!"); Pointer scalarPointer = nativeOps.mallocHost(1 * sizeOf, 0); if (scalarPointer == null) throw new IllegalStateException("Can't allocate [HOST] scalar buffer memory!"); context.setBufferScalar(scalarPointer); context.setBufferAllocation(allocationPointer); context.setBufferReduction(reductionPointer); Pointer specialPointer = nativeOps.mallocDevice(1024 * 1024 * sizeOf, new CudaPointer(deviceId), 0); if (specialPointer == null) throw new IllegalStateException("Can't allocate [DEVICE] special buffer memory!"); nativeOps.memsetAsync(specialPointer, 0, 65536 * sizeOf, 0, context.getOldStream()); context.setBufferSpecial(specialPointer); }
/** * This method is used to allocate * @param context * @param deviceId */ protected void getDeviceBuffers(CudaContext context, int deviceId) { NativeOps nativeOps = NativeOpsHolder.getInstance().getDeviceNativeOps(); //((CudaExecutioner) Nd4j.getExecutioner()).getNativeOps(); // we hardcode sizeOf to sizeOf(double) int sizeOf = 8; Pointer reductionPointer = nativeOps.mallocDevice(16385 * sizeOf * 2, new CudaPointer(deviceId), 0); if (reductionPointer == null) throw new IllegalStateException("Can't allocate [DEVICE] reduction buffer memory!"); nativeOps.memsetAsync(reductionPointer, 0, 16385 * sizeOf * 2, 0, context.getOldStream()); context.syncOldStream(); Pointer allocationPointer = nativeOps.mallocDevice(1024 * 1024, new CudaPointer(deviceId), 0); if (allocationPointer == null) throw new IllegalStateException("Can't allocate [DEVICE] allocation buffer memory!"); Pointer scalarPointer = nativeOps.mallocHost(1 * sizeOf, 0); if (scalarPointer == null) throw new IllegalStateException("Can't allocate [HOST] scalar buffer memory!"); context.setBufferScalar(scalarPointer); context.setBufferAllocation(allocationPointer); context.setBufferReduction(reductionPointer); Pointer specialPointer = nativeOps.mallocDevice(1024 * 1024 * sizeOf, new CudaPointer(deviceId), 0); if (specialPointer == null) throw new IllegalStateException("Can't allocate [DEVICE] special buffer memory!"); nativeOps.memsetAsync(specialPointer, 0, 65536 * sizeOf, 0, context.getOldStream()); context.setBufferSpecial(specialPointer); }
@Override public void convertDataEx(DataBuffer.TypeEx typeSrc, Pointer source, DataBuffer.TypeEx typeDst, DataBuffer buffer) { Pointer srcPtr = null; Pointer dstPtr = null; long size = 0; long ssize = 0; val stream = ((CudaContext) AtomicAllocator.getInstance().getDeviceContext().getContext()).getOldStream(); if (buffer instanceof CompressedDataBuffer) { // compressing size = ((CompressedDataBuffer) buffer).getCompressionDescriptor().getCompressedLength(); ssize = ((CompressedDataBuffer) buffer).getCompressionDescriptor().getOriginalLength(); srcPtr = nativeOps.mallocDevice(ssize, null, 0); dstPtr = nativeOps.mallocDevice(size, null, 0); nativeOps.memcpyAsync(srcPtr, source, ssize, CudaConstants.cudaMemcpyHostToDevice, stream); } else { // decompressing throw new UnsupportedOperationException(); } convertDataEx(typeSrc, srcPtr, typeDst, dstPtr, buffer.length()); nativeOps.memcpyAsync(buffer.addressPointer(), dstPtr, size, CudaConstants.cudaMemcpyHostToHost, stream); stream.synchronize(); if (buffer instanceof CompressedDataBuffer) { nativeOps.freeDevice(srcPtr, null); nativeOps.freeDevice(dstPtr, null); } }
Pointer ptr = NativeOpsHolder.getInstance().getDeviceNativeOps().mallocDevice(bytes, null, 0);
Pointer ptr = NativeOpsHolder.getInstance().getDeviceNativeOps().mallocDevice(bytes, null, 0);
Pointer pointer = nativeOps.mallocDevice(reqMem, null, 0);
Pointer pointer = nativeOps.mallocDevice(reqMem, null, 0);
log.info("Replacing source ptr"); val size = ((CompressedDataBuffer) source).getCompressionDescriptor().getCompressedLength(); srcPtr = nativeOps.mallocDevice(size, null, 0); nativeOps.memcpyAsync(srcPtr, source.addressPointer(), size, CudaConstants.cudaMemcpyHostToHost, stream); stream.synchronize(); log.info("Replacing target ptr"); val size = ((CompressedDataBuffer) target).getCompressionDescriptor().getCompressedLength(); dstPtr = nativeOps.mallocDevice(size, null, 0);