public static void main(String[] args) throws Exception { CudaEnvironment.getInstance().getConfiguration()
CudaEnvironment.getInstance().getConfiguration()
CudaEnvironment.getInstance().getConfiguration()
CudaEnvironment.getInstance().getConfiguration()
CudaEnvironment.getInstance().getConfiguration()
CudaEnvironment.getInstance().getConfiguration()
/** * This method pairs specified thread & device * * @param threadId * @param deviceId */ @Override public void attachThreadToDevice(long threadId, Integer deviceId) { List<Integer> devices = new ArrayList<>(CudaEnvironment.getInstance().getConfiguration().getAvailableDevices()); logger.trace("Manually mapping thread [{}] to device [{}], out of [{}] devices...", threadId, deviceId, devices.size()); affinityMap.put(threadId, deviceId); }
/** * This method pairs specified thread & device * * @param threadId * @param deviceId */ @Override public void attachThreadToDevice(long threadId, Integer deviceId) { List<Integer> devices = new ArrayList<>(CudaEnvironment.getInstance().getConfiguration().getAvailableDevices()); logger.debug("Manually mapping thread [{}] to device [{}], out of [{}] devices...", threadId, deviceId, devices.size()); affinityMap.put(threadId, deviceId); }
@Override public void allowCrossDeviceAccess(boolean reallyAllow) { CudaEnvironment.getInstance().getConfiguration().allowCrossDeviceAccess(reallyAllow); } }
@Override public void allowCrossDeviceAccess(boolean reallyAllow) { CudaEnvironment.getInstance().getConfiguration().allowCrossDeviceAccess(reallyAllow); } }
public LimitedContextPool() { int perDevicePool = CudaEnvironment.getInstance().getConfiguration().getPoolSize(); for (int i = 0; i < 4; i++) { ReferenceQueue<Thread> queue = new ReferenceQueue<>(); ResourceGarbageCollectorThread collector = new ResourceGarbageCollectorThread(i, queue); collector.start(); collectors.put(i, collector); queueMap.put(i, queue); } fillPoolWithResources(perDevicePool, false); currentPoolSize.set(perDevicePool); }
public LimitedContextPool() { int perDevicePool = CudaEnvironment.getInstance().getConfiguration().getPoolSize(); for (int i = 0; i < 4; i++) { ReferenceQueue<Thread> queue = new ReferenceQueue<>(); ResourceGarbageCollectorThread collector = new ResourceGarbageCollectorThread(i, queue); collector.start(); collectors.put(i, collector); queueMap.put(i, queue); } fillPoolWithResources(perDevicePool, false); currentPoolSize.set(perDevicePool); }
public static void nd4jTest() { System.out.println( "Device count:" + CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size()); ThreadLocalRandom rand = ThreadLocalRandom.current(); double[][] data = new double[100000][100]; for (int i = 0; i < data.length; i++) { for (int j = 0; j < data[0].length; j++) { data[i][j] = rand.nextDouble(); } } long ts = System.currentTimeMillis(); INDArray ary = Nd4j.create(data); for (int i = 0; i < 100; i++) { ary.transpose(); } ts = System.currentTimeMillis() - ts; System.out.println(ts + " ms for 100 iterations"); }
@Override public void setAutoGcWindow(int windowMillis) { super.setAutoGcWindow(windowMillis); CudaEnvironment.getInstance().getConfiguration().setNoGcWindowMs(windowMillis); }
@Override public void setAutoGcWindow(int windowMillis) { super.setAutoGcWindow(windowMillis); CudaEnvironment.getInstance().getConfiguration().setNoGcWindowMs(windowMillis); }
@Override public boolean isCrossDeviceAccessSupported() { return NativeOpsHolder.getInstance().getDeviceNativeOps().isP2PAvailable() && CudaEnvironment.getInstance().getConfiguration().isCrossDeviceAccessAllowed(); }
@Override public boolean isCrossDeviceAccessSupported() { return NativeOpsHolder.getInstance().getDeviceNativeOps().isP2PAvailable() && CudaEnvironment.getInstance().getConfiguration().isCrossDeviceAccessAllowed(); }
private AtomicAllocator() { this.configuration = CudaEnvironment.getInstance().getConfiguration(); applyConfiguration(); this.memoryHandler = new CudaZeroHandler(); this.memoryHandler.init(configuration, this); initDeviceCollectors(); initHostCollectors(); this.protector = ConstantProtector.getInstance(); }
private AtomicAllocator() { this.configuration = CudaEnvironment.getInstance().getConfiguration(); applyConfiguration(); this.memoryHandler = new CudaZeroHandler(); this.memoryHandler.init(configuration, this); initDeviceCollectors(); initHostCollectors(); this.protector = ConstantProtector.getInstance(); }
@Override public void backprop(INDArray gradAtOutput, INDArray gradAtInput) { int[] gradAtOutShape = adaptForTensorDescr(ArrayUtil.toInts(gradAtOutput.shape())); int[] gradAtOutStride = adaptForTensorDescr(ArrayUtil.toInts(gradAtOutput.stride())); checkCudnn(cudnnSetTensorNdDescriptor(cudnnContext.dyTensorDesc, dataType, gradAtOutShape.length, gradAtOutShape, gradAtOutStride)); int[] gradAtInShape = adaptForTensorDescr(ArrayUtil.toInts(gradAtInput.shape())); int[] gradAtInStride = adaptForTensorDescr(ArrayUtil.toInts(gradAtInput.stride())); checkCudnn(cudnnSetTensorNdDescriptor(cudnnContext.dxTensorDesc, dataType, gradAtInShape.length, gradAtInShape, gradAtInStride)); Allocator allocator = AtomicAllocator.getInstance(); CudaContext context = allocator.getFlowController().prepareAction(gradAtOutput, gradAtInput); Pointer dyPtr = allocator.getPointer(gradAtOutput, context); Pointer dxPtr = allocator.getPointer(gradAtInput, context); checkCudnn(cudnnDropoutBackward(cudnnContext, cudnnContext.dropoutDesc, cudnnContext.dyTensorDesc, dyPtr, cudnnContext.dxTensorDesc, dxPtr, mask, mask.capacity())); allocator.registerAction(context, gradAtOutput, gradAtInput); if (CudaEnvironment.getInstance().getConfiguration().isDebug()) context.syncOldStream(); } }