if (CudaEnvironment.getInstance().getConfiguration().isDebug()) context.syncOldStream();
public void applyConfiguration() { //log.info("Applying CUDA configuration..."); CudaEnvironment.getInstance().notifyConfigurationApplied(); NativeOpsHolder.getInstance().getDeviceNativeOps().enableDebugMode(configuration.isDebug()); //configuration.enableDebug(configuration.isDebug()); NativeOpsHolder.getInstance().getDeviceNativeOps().enableVerboseMode(configuration.isVerbose()); //configuration.setVerbose(configuration.isVerbose()); NativeOpsHolder.getInstance().getDeviceNativeOps().enableP2P(configuration.isCrossDeviceAccessAllowed()); //configuration.allowCrossDeviceAccess(configuration.isCrossDeviceAccessAllowed()); NativeOpsHolder.getInstance().getDeviceNativeOps().setGridLimit(configuration.getMaximumGridSize()); //configuration.setMaximumGridSize(configuration.getMaximumGridSize()); NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpNumThreads(configuration.getMaximumBlockSize()); // configuration.setMaximumBlockSize(configuration.getMaximumBlockSize()); NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpMinThreads(configuration.getMinimumBlockSize()); // configuration.setMinimumBlockSize(configuration.getMinimumBlockSize()); }
public void applyConfiguration() { //log.info("Applying CUDA configuration..."); CudaEnvironment.getInstance().notifyConfigurationApplied(); NativeOpsHolder.getInstance().getDeviceNativeOps().enableDebugMode(configuration.isDebug()); //configuration.enableDebug(configuration.isDebug()); NativeOpsHolder.getInstance().getDeviceNativeOps().enableVerboseMode(configuration.isVerbose()); //configuration.setVerbose(configuration.isVerbose()); NativeOpsHolder.getInstance().getDeviceNativeOps().enableP2P(configuration.isCrossDeviceAccessAllowed()); //configuration.allowCrossDeviceAccess(configuration.isCrossDeviceAccessAllowed()); NativeOpsHolder.getInstance().getDeviceNativeOps().setGridLimit(configuration.getMaximumGridSize()); //configuration.setMaximumGridSize(configuration.getMaximumGridSize()); NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpNumThreads(configuration.getMaximumBlockSize()); // configuration.setMaximumBlockSize(configuration.getMaximumBlockSize()); NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpMinThreads(configuration.getMinimumBlockSize()); // configuration.setMinimumBlockSize(configuration.getMinimumBlockSize()); }
@Override public void backprop(INDArray gradAtOutput, INDArray gradAtInput) { int[] gradAtOutShape = adaptForTensorDescr(ArrayUtil.toInts(gradAtOutput.shape())); int[] gradAtOutStride = adaptForTensorDescr(ArrayUtil.toInts(gradAtOutput.stride())); checkCudnn(cudnnSetTensorNdDescriptor(cudnnContext.dyTensorDesc, dataType, gradAtOutShape.length, gradAtOutShape, gradAtOutStride)); int[] gradAtInShape = adaptForTensorDescr(ArrayUtil.toInts(gradAtInput.shape())); int[] gradAtInStride = adaptForTensorDescr(ArrayUtil.toInts(gradAtInput.stride())); checkCudnn(cudnnSetTensorNdDescriptor(cudnnContext.dxTensorDesc, dataType, gradAtInShape.length, gradAtInShape, gradAtInStride)); Allocator allocator = AtomicAllocator.getInstance(); CudaContext context = allocator.getFlowController().prepareAction(gradAtOutput, gradAtInput); Pointer dyPtr = allocator.getPointer(gradAtOutput, context); Pointer dxPtr = allocator.getPointer(gradAtInput, context); checkCudnn(cudnnDropoutBackward(cudnnContext, cudnnContext.dropoutDesc, cudnnContext.dyTensorDesc, dyPtr, cudnnContext.dxTensorDesc, dxPtr, mask, mask.capacity())); allocator.registerAction(context, gradAtOutput, gradAtInput); if (CudaEnvironment.getInstance().getConfiguration().isDebug()) context.syncOldStream(); } }
if (CudaEnvironment.getInstance().getConfiguration().isDebug()) context.syncOldStream();
extraz.set(new PointerPointer(32)); if (CudaEnvironment.getInstance().getConfiguration().isDebug()) lastOp.set(op.name());
extraz.set(new PointerPointer(32)); if (CudaEnvironment.getInstance().getConfiguration().isDebug()) lastOp.set(op.opName());
if (CudaEnvironment.getInstance().getConfiguration().isDebug()) context.syncOldStream();
dimension = new int[] {Integer.MAX_VALUE}; if (CudaEnvironment.getInstance().getConfiguration().isDebug()) lastOp.set(op.name());
extraz.set(new PointerPointer(32)); if (CudaEnvironment.getInstance().getConfiguration().isDebug()) lastOp.set(op.name()); CudaEnvironment.getInstance().getConfiguration().enableDebug(true);
if (CudaEnvironment.getInstance().getConfiguration().isDebug()) context.syncOldStream();
if (CudaEnvironment.getInstance().getConfiguration().isDebug()) lastOp.set(op.name());
if (CudaEnvironment.getInstance().getConfiguration().isDebug()) lastOp.set(op.opName());
if (CudaEnvironment.getInstance().getConfiguration().isDebug()) lastOp.set(op.name());
if (CudaEnvironment.getInstance().getConfiguration().isDebug()) lastOp.set(op.opName());
extraz.set(new PointerPointer(32)); if (CudaEnvironment.getInstance().getConfiguration().isDebug()) lastOp.set(op.opName()); CudaEnvironment.getInstance().getConfiguration().enableDebug(true);
if (CudaEnvironment.getInstance().getConfiguration().isDebug()) lastOp.set(op.opName());
if (CudaEnvironment.getInstance().getConfiguration().isDebug()) lastOp.set(op.name());
if (CudaEnvironment.getInstance().getConfiguration().isDebug()) context.syncOldStream();
if (CudaEnvironment.getInstance().getConfiguration().isDebug()) context.syncOldStream();