org.nd4j.jita.conf.Configuration java code examples

.allowMultiGPU(true)
.setMaximumDeviceCache(2L * 1024L * 1024L * 1024L)
.allowCrossDeviceAccess(true);

private CudaEnvironment() {
  configuration = new Configuration();
}

try {
  int var = Integer.parseInt(System.getenv(ND4JEnvironmentVars.ND4J_CUDA_MAX_BLOCK_SIZE));
  setMaximumBlockSize(var);
} catch (Exception e) {
  log.error("Can't parse {}: [{}]", ND4JEnvironmentVars.ND4J_CUDA_MAX_BLOCK_SIZE, System.getenv(ND4JEnvironmentVars.ND4J_CUDA_MAX_BLOCK_SIZE));
try {
  int var = Integer.parseInt(System.getenv(ND4JEnvironmentVars.ND4J_CUDA_MIN_BLOCK_SIZE));
  setMinimumBlockSize(var);
} catch (Exception e) {
  log.error("Can't parse {}: [{}]", ND4JEnvironmentVars.ND4J_CUDA_MIN_BLOCK_SIZE, System.getenv(ND4JEnvironmentVars.ND4J_CUDA_MIN_BLOCK_SIZE));
try {
  int var = Integer.parseInt(System.getenv(ND4JEnvironmentVars.ND4J_CUDA_MAX_GRID_SIZE));
  setMaximumGridSize(var);
} catch (Exception e) {
  log.error("Can't parse {}: [{}]", ND4JEnvironmentVars.ND4J_CUDA_MAX_GRID_SIZE, System.getenv(ND4JEnvironmentVars.ND4J_CUDA_MAX_GRID_SIZE));
try {
  boolean var = Boolean.parseBoolean(System.getenv(ND4JEnvironmentVars.ND4J_CUDA_FORCE_SINGLE_GPU));
  allowMultiGPU(!var);
} catch (Exception e) {
  log.error("Can't parse {}: [{}]", ND4JEnvironmentVars.ND4J_CUDA_FORCE_SINGLE_GPU, System.getenv(ND4JEnvironmentVars.ND4J_CUDA_FORCE_SINGLE_GPU));
try {
  boolean var = Boolean.parseBoolean(System.getenv(ND4JEnvironmentVars.ND4J_CUDA_USE_PREALLOCATION));
  allowPreallocation(var);
} catch (Exception e) {
  log.error("Can't parse {}: [{}]", ND4JEnvironmentVars.ND4J_CUDA_USE_PREALLOCATION, System.getenv(ND4JEnvironmentVars.ND4J_CUDA_USE_PREALLOCATION));

public void applyConfiguration() {
  //log.info("Applying CUDA configuration...");
  CudaEnvironment.getInstance().notifyConfigurationApplied();
  NativeOpsHolder.getInstance().getDeviceNativeOps().enableDebugMode(configuration.isDebug());
  //configuration.enableDebug(configuration.isDebug());
  NativeOpsHolder.getInstance().getDeviceNativeOps().enableVerboseMode(configuration.isVerbose());
  //configuration.setVerbose(configuration.isVerbose());
  NativeOpsHolder.getInstance().getDeviceNativeOps().enableP2P(configuration.isCrossDeviceAccessAllowed());
  //configuration.allowCrossDeviceAccess(configuration.isCrossDeviceAccessAllowed());
  NativeOpsHolder.getInstance().getDeviceNativeOps().setGridLimit(configuration.getMaximumGridSize());
  //configuration.setMaximumGridSize(configuration.getMaximumGridSize());
  NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpNumThreads(configuration.getMaximumBlockSize());
  // configuration.setMaximumBlockSize(configuration.getMaximumBlockSize());
  NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpMinThreads(configuration.getMinimumBlockSize());
  // configuration.setMinimumBlockSize(configuration.getMinimumBlockSize());
}

if (CudaEnvironment.getInstance().getConfiguration().isDebug())
  context.syncOldStream();

public CudaZeroHandler() {
  configuration.setInitialized();
  this.INITIAL_LOCATION = configuration.getFirstMemory();
  switch (configuration.getExecutionModel()) {
    case OPTIMIZED:
    case ASYNCHRONOUS: {
      throw new RuntimeException("Unknown ExecutionModel: [" + configuration.getExecutionModel() + "]");
  switch (configuration.getAllocationModel()) {
    case CACHE_ALL:
      this.memoryProvider = new CudaFullCachingProvider();
      break;
    default:
      throw new RuntimeException("Unknown AllocationModel: [" + configuration.getAllocationModel() + "]");

/**
 * This method returns set of available devices
 * @return
 */
@Override
public Set<Integer> getAvailableDevices() {
  return new HashSet<>(configuration.getAvailableDevices());
}

/**
 * This method returns device id available. Round-robin balancing used here.
 *
 * @param threadId this parameter can be anything, it's used for logging only.
 * @return
 */
protected Integer getNextDevice(long threadId) {
  Integer device = null;
  if (!CudaEnvironment.getInstance().getConfiguration().isForcedSingleGPU() && getNumberOfDevices() > 0) {
    // simple round-robin here
    synchronized (this) {
      device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(devPtr.getAndIncrement());
      // We check only for number of entries here, not their actual values
      if (devPtr.get() >= CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size())
        devPtr.set(0);
      logger.debug("Mapping thread [{}] to device [{}], out of [{}] devices...", threadId, device,
          CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size());
    }
  } else {
    device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(0);
    logger.debug("Single device is forced, mapping to device [{}]", device);
  }
  return device;
}

  extraz.set(new PointerPointer(32));
if (CudaEnvironment.getInstance().getConfiguration().isDebug())
  lastOp.set(op.name());
CudaEnvironment.getInstance().getConfiguration().enableDebug(true);
for (int i = 0; i < dimension.length; i++)
  if (dimension[i] >= op.x().rank() && dimension[i] != Integer.MAX_VALUE)

public AsynchronousFlowController() {
  int numLanes = configuration.getCommandLanesNumber();
  int numDevices = Nd4j.getAffinityManager().getNumberOfDevices();
  for (int d = 0; d < numDevices; d++) {
    eventsBarrier.add(d, new ArrayList<Queue<cudaEvent_t>>());
    laneClocks.add(d, new ArrayList<AtomicLong>());
    deviceClocks.add(d, new AtomicLong(0));
    for (int l = 0; l < numLanes; l++) {
      eventsBarrier.get(d).add(l, new ConcurrentLinkedQueue<cudaEvent_t>());
      laneClocks.get(d).add(l, new AtomicLong(0));
    }
  }
}

  @Override
  public void allowCrossDeviceAccess(boolean reallyAllow) {
    CudaEnvironment.getInstance().getConfiguration().allowCrossDeviceAccess(reallyAllow);
  }
}

try {
  int var = Integer.parseInt(env.get(MAX_BLOCK_SIZE));
  setMaximumBlockSize(var);
} catch (Exception e) {
  log.error("Can't parse {}: [{}]", MAX_BLOCK_SIZE, env.get(MAX_BLOCK_SIZE));
try {
  int var = Integer.parseInt(env.get(MIN_BLOCK_SIZE));
  setMinimumBlockSize(var);
} catch (Exception e) {
  log.error("Can't parse {}: [{}]", MIN_BLOCK_SIZE, env.get(MIN_BLOCK_SIZE));
try {
  int var = Integer.parseInt(env.get(MAX_GRID_SIZE));
  setMaximumGridSize(var);
} catch (Exception e) {
  log.error("Can't parse {}: [{}]", MAX_GRID_SIZE, env.get(MAX_GRID_SIZE));
try {
  boolean var = Boolean.parseBoolean(env.get(DEBUG_ENABLED));
  enableDebug(var);
} catch (Exception e) {
  log.error("Can't parse {}: [{}]", DEBUG_ENABLED, env.get(DEBUG_ENABLED));
try {
  boolean var = Boolean.parseBoolean(env.get(FORCE_SINGLE_GPU));
  allowMultiGPU(!var);
} catch (Exception e) {
  log.error("Can't parse {}: [{}]", FORCE_SINGLE_GPU, env.get(FORCE_SINGLE_GPU));

public void applyConfiguration() {
  //log.info("Applying CUDA configuration...");
  CudaEnvironment.getInstance().notifyConfigurationApplied();
  NativeOpsHolder.getInstance().getDeviceNativeOps().enableDebugMode(configuration.isDebug());
  //configuration.enableDebug(configuration.isDebug());
  NativeOpsHolder.getInstance().getDeviceNativeOps().enableVerboseMode(configuration.isVerbose());
  //configuration.setVerbose(configuration.isVerbose());
  NativeOpsHolder.getInstance().getDeviceNativeOps().enableP2P(configuration.isCrossDeviceAccessAllowed());
  //configuration.allowCrossDeviceAccess(configuration.isCrossDeviceAccessAllowed());
  NativeOpsHolder.getInstance().getDeviceNativeOps().setGridLimit(configuration.getMaximumGridSize());
  //configuration.setMaximumGridSize(configuration.getMaximumGridSize());
  NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpNumThreads(configuration.getMaximumBlockSize());
  // configuration.setMaximumBlockSize(configuration.getMaximumBlockSize());
  NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpMinThreads(configuration.getMinimumBlockSize());
  // configuration.setMinimumBlockSize(configuration.getMinimumBlockSize());
}

  @Override
  public void backprop(INDArray gradAtOutput, INDArray gradAtInput) {
    int[] gradAtOutShape = adaptForTensorDescr(ArrayUtil.toInts(gradAtOutput.shape()));
    int[] gradAtOutStride = adaptForTensorDescr(ArrayUtil.toInts(gradAtOutput.stride()));
    checkCudnn(cudnnSetTensorNdDescriptor(cudnnContext.dyTensorDesc, dataType, gradAtOutShape.length, gradAtOutShape, gradAtOutStride));

    int[] gradAtInShape = adaptForTensorDescr(ArrayUtil.toInts(gradAtInput.shape()));
    int[] gradAtInStride = adaptForTensorDescr(ArrayUtil.toInts(gradAtInput.stride()));
    checkCudnn(cudnnSetTensorNdDescriptor(cudnnContext.dxTensorDesc, dataType, gradAtInShape.length, gradAtInShape, gradAtInStride));

    Allocator allocator = AtomicAllocator.getInstance();
    CudaContext context = allocator.getFlowController().prepareAction(gradAtOutput, gradAtInput);
    Pointer dyPtr = allocator.getPointer(gradAtOutput, context);
    Pointer dxPtr = allocator.getPointer(gradAtInput, context);

    checkCudnn(cudnnDropoutBackward(cudnnContext, cudnnContext.dropoutDesc, cudnnContext.dyTensorDesc, dyPtr,
        cudnnContext.dxTensorDesc, dxPtr, mask, mask.capacity()));

    allocator.registerAction(context, gradAtOutput, gradAtInput);
    if (CudaEnvironment.getInstance().getConfiguration().isDebug())
      context.syncOldStream();
  }
}

public CudaZeroHandler() {
  configuration.setInitialized();
  this.INITIAL_LOCATION = configuration.getFirstMemory();
  switch (configuration.getExecutionModel()) {
    case OPTIMIZED:
    case ASYNCHRONOUS: {
      throw new RuntimeException("Unknown ExecutionModel: [" + configuration.getExecutionModel() + "]");
  switch (configuration.getAllocationModel()) {
    case CACHE_ALL:
      this.memoryProvider = new CudaFullCachingProvider();
      break;
    default:
      throw new RuntimeException("Unknown AllocationModel: [" + configuration.getAllocationModel() + "]");

/**
 * This method returns set of available devices
 * @return
 */
@Override
public Set<Integer> getAvailableDevices() {
  return new HashSet<>(configuration.getAvailableDevices());
}

/**
 * This method returns device id available. Round-robin balancing used here.
 *
 * @param threadId this parameter can be anything, it's used for logging only.
 * @return
 */
protected Integer getNextDevice(long threadId) {
  Integer device = null;
  if (!CudaEnvironment.getInstance().getConfiguration().isForcedSingleGPU() && getNumberOfDevices() > 0) {
    // simple round-robin here
    synchronized (this) {
      device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(devPtr.getAndIncrement());
      // We check only for number of entries here, not their actual values
      if (devPtr.get() >= CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size())
        devPtr.set(0);
      logger.debug("Mapping thread [{}] to device [{}], out of [{}] devices...", threadId, device,
          CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size());
    }
  } else {
    device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(0);
    logger.debug("Single device is forced, mapping to device [{}]", device);
  }
  return device;
}

  extraz.set(new PointerPointer(32));
if (CudaEnvironment.getInstance().getConfiguration().isDebug())
  lastOp.set(op.opName());
CudaEnvironment.getInstance().getConfiguration().enableDebug(true);
for (int i = 0; i < dimension.length; i++)
  if (dimension[i] >= op.x().rank() && dimension[i] != Integer.MAX_VALUE)

public AsynchronousFlowController() {
  int numLanes = configuration.getCommandLanesNumber();
  int numDevices = Nd4j.getAffinityManager().getNumberOfDevices();
  for (int d = 0; d < numDevices; d++) {
    eventsBarrier.add(d, new ArrayList<Queue<cudaEvent_t>>());
    laneClocks.add(d, new ArrayList<AtomicLong>());
    deviceClocks.add(d, new AtomicLong(0));
    for (int l = 0; l < numLanes; l++) {
      eventsBarrier.get(d).add(l, new ConcurrentLinkedQueue<cudaEvent_t>());
      laneClocks.get(d).add(l, new AtomicLong(0));
    }
  }
}

  @Override
  public void allowCrossDeviceAccess(boolean reallyAllow) {
    CudaEnvironment.getInstance().getConfiguration().allowCrossDeviceAccess(reallyAllow);
  }
}

Most used methods

allowCrossDeviceAccess
Enables/disables P2P memory access for multi-gpu
allowMultiGPU
This method allows you to enable or disable multi-GPU mode. PLEASE NOTE: This is NOT magic method, t
getAvailableDevices
isDebug
setMaximumDeviceCache
This method allows you to specify maximum memory cache per device
<init>
allowPreallocation
If set to true, each non-cached allocation request will cause few additional allocations, Default va
enableDebug
With debug enabled all CUDA launches will become synchronous, with forced stream synchronizations af
getAllocationModel
getCommandLanesNumber
getExecutionModel
getFirstMemory

Popular in Java

Creating JSON documents from java classes using gson
startActivity (Activity)
putExtra (Intent)
setRequestProperty (URLConnection)
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
Logger (org.apache.log4j)
This is the central class in the log4j package. Most logging operations, except configuration, are d
Table (com.google.common.collect)
A collection that associates an ordered pair of keys, called a row key and a column key, with a sing
Rectangle (java.awt)
A Rectangle specifies an area in a coordinate space that is enclosed by the Rectangle object's top-
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
Top PhpStorm plugins

How to useConfiguration in org.nd4j.jita.conf

Best Java code snippets using org.nd4j.jita.conf.Configuration (Showing top 20 results out of 315)

How to use
Configuration
in
org.nd4j.jita.conf