public static void main(String[] args) throws Exception { CudaEnvironment.getInstance().getConfiguration()
public void applyConfiguration() { //log.info("Applying CUDA configuration..."); CudaEnvironment.getInstance().notifyConfigurationApplied(); NativeOpsHolder.getInstance().getDeviceNativeOps().enableDebugMode(configuration.isDebug()); //configuration.enableDebug(configuration.isDebug()); NativeOpsHolder.getInstance().getDeviceNativeOps().enableVerboseMode(configuration.isVerbose()); //configuration.setVerbose(configuration.isVerbose()); NativeOpsHolder.getInstance().getDeviceNativeOps().enableP2P(configuration.isCrossDeviceAccessAllowed()); //configuration.allowCrossDeviceAccess(configuration.isCrossDeviceAccessAllowed()); NativeOpsHolder.getInstance().getDeviceNativeOps().setGridLimit(configuration.getMaximumGridSize()); //configuration.setMaximumGridSize(configuration.getMaximumGridSize()); NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpNumThreads(configuration.getMaximumBlockSize()); // configuration.setMaximumBlockSize(configuration.getMaximumBlockSize()); NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpMinThreads(configuration.getMinimumBlockSize()); // configuration.setMinimumBlockSize(configuration.getMinimumBlockSize()); }
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream())); int arch = CudaEnvironment.getInstance().getCurrentDeviceArchitecture();
public void applyConfiguration() { //log.info("Applying CUDA configuration..."); CudaEnvironment.getInstance().notifyConfigurationApplied(); NativeOpsHolder.getInstance().getDeviceNativeOps().enableDebugMode(configuration.isDebug()); //configuration.enableDebug(configuration.isDebug()); NativeOpsHolder.getInstance().getDeviceNativeOps().enableVerboseMode(configuration.isVerbose()); //configuration.setVerbose(configuration.isVerbose()); NativeOpsHolder.getInstance().getDeviceNativeOps().enableP2P(configuration.isCrossDeviceAccessAllowed()); //configuration.allowCrossDeviceAccess(configuration.isCrossDeviceAccessAllowed()); NativeOpsHolder.getInstance().getDeviceNativeOps().setGridLimit(configuration.getMaximumGridSize()); //configuration.setMaximumGridSize(configuration.getMaximumGridSize()); NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpNumThreads(configuration.getMaximumBlockSize()); // configuration.setMaximumBlockSize(configuration.getMaximumBlockSize()); NativeOpsHolder.getInstance().getDeviceNativeOps().setOmpMinThreads(configuration.getMinimumBlockSize()); // configuration.setMinimumBlockSize(configuration.getMinimumBlockSize()); }
cublasSetStream_v2(new cublasContext(handle), new CUstream_st(ctx.getOldStream())); int arch = CudaEnvironment.getInstance().getCurrentDeviceArchitecture();
CudaEnvironment.getInstance().getConfiguration()
CudaEnvironment.getInstance().getConfiguration()
CudaEnvironment.getInstance().getConfiguration()
CudaEnvironment.getInstance().getConfiguration()
CudaEnvironment.getInstance().getConfiguration()
@Override public void allowCrossDeviceAccess(boolean reallyAllow) { CudaEnvironment.getInstance().getConfiguration().allowCrossDeviceAccess(reallyAllow); } }
@Override public void allowCrossDeviceAccess(boolean reallyAllow) { CudaEnvironment.getInstance().getConfiguration().allowCrossDeviceAccess(reallyAllow); } }
/** * This method pairs specified thread & device * * @param threadId * @param deviceId */ @Override public void attachThreadToDevice(long threadId, Integer deviceId) { List<Integer> devices = new ArrayList<>(CudaEnvironment.getInstance().getConfiguration().getAvailableDevices()); logger.trace("Manually mapping thread [{}] to device [{}], out of [{}] devices...", threadId, deviceId, devices.size()); affinityMap.put(threadId, deviceId); }
/** * This method pairs specified thread & device * * @param threadId * @param deviceId */ @Override public void attachThreadToDevice(long threadId, Integer deviceId) { List<Integer> devices = new ArrayList<>(CudaEnvironment.getInstance().getConfiguration().getAvailableDevices()); logger.debug("Manually mapping thread [{}] to device [{}], out of [{}] devices...", threadId, deviceId, devices.size()); affinityMap.put(threadId, deviceId); }
@Override public void setAutoGcWindow(int windowMillis) { super.setAutoGcWindow(windowMillis); CudaEnvironment.getInstance().getConfiguration().setNoGcWindowMs(windowMillis); }
@Override public void setAutoGcWindow(int windowMillis) { super.setAutoGcWindow(windowMillis); CudaEnvironment.getInstance().getConfiguration().setNoGcWindowMs(windowMillis); }
/** * This method returns device id available. Round-robin balancing used here. * * @param threadId this parameter can be anything, it's used for logging only. * @return */ protected Integer getNextDevice(long threadId) { Integer device = null; if (!CudaEnvironment.getInstance().getConfiguration().isForcedSingleGPU() && getNumberOfDevices() > 0) { // simple round-robin here synchronized (this) { device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(devPtr.getAndIncrement()); // We check only for number of entries here, not their actual values if (devPtr.get() >= CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size()) devPtr.set(0); logger.debug("Mapping thread [{}] to device [{}], out of [{}] devices...", threadId, device, CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size()); } } else { device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(0); logger.debug("Single device is forced, mapping to device [{}]", device); } return device; }
/** * This method returns device id available. Round-robin balancing used here. * * @param threadId this parameter can be anything, it's used for logging only. * @return */ protected Integer getNextDevice(long threadId) { Integer device = null; if (!CudaEnvironment.getInstance().getConfiguration().isForcedSingleGPU() && getNumberOfDevices() > 0) { // simple round-robin here synchronized (this) { device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(devPtr.getAndIncrement()); // We check only for number of entries here, not their actual values if (devPtr.get() >= CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size()) devPtr.set(0); logger.debug("Mapping thread [{}] to device [{}], out of [{}] devices...", threadId, device, CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().size()); } } else { device = CudaEnvironment.getInstance().getConfiguration().getAvailableDevices().get(0); logger.debug("Single device is forced, mapping to device [{}]", device); } return device; }
public LimitedContextPool() { int perDevicePool = CudaEnvironment.getInstance().getConfiguration().getPoolSize(); for (int i = 0; i < 4; i++) { ReferenceQueue<Thread> queue = new ReferenceQueue<>(); ResourceGarbageCollectorThread collector = new ResourceGarbageCollectorThread(i, queue); collector.start(); collectors.put(i, collector); queueMap.put(i, queue); } fillPoolWithResources(perDevicePool, false); currentPoolSize.set(perDevicePool); }
public LimitedContextPool() { int perDevicePool = CudaEnvironment.getInstance().getConfiguration().getPoolSize(); for (int i = 0; i < 4; i++) { ReferenceQueue<Thread> queue = new ReferenceQueue<>(); ResourceGarbageCollectorThread collector = new ResourceGarbageCollectorThread(i, queue); collector.start(); collectors.put(i, collector); queueMap.put(i, queue); } fillPoolWithResources(perDevicePool, false); currentPoolSize.set(perDevicePool); }