public AsynchronousFlowController() { int numLanes = configuration.getCommandLanesNumber(); int numDevices = Nd4j.getAffinityManager().getNumberOfDevices(); for (int d = 0; d < numDevices; d++) { eventsBarrier.add(d, new ArrayList<Queue<cudaEvent_t>>()); laneClocks.add(d, new ArrayList<AtomicLong>()); deviceClocks.add(d, new AtomicLong(0)); for (int l = 0; l < numLanes; l++) { eventsBarrier.get(d).add(l, new ConcurrentLinkedQueue<cudaEvent_t>()); laneClocks.get(d).add(l, new AtomicLong(0)); } } }
public AsynchronousFlowController() { int numLanes = configuration.getCommandLanesNumber(); int numDevices = Nd4j.getAffinityManager().getNumberOfDevices(); for (int d = 0; d < numDevices; d++) { eventsBarrier.add(d, new ArrayList<Queue<cudaEvent_t>>()); laneClocks.add(d, new ArrayList<AtomicLong>()); deviceClocks.add(d, new AtomicLong(0)); for (int l = 0; l < numLanes; l++) { eventsBarrier.get(d).add(l, new ConcurrentLinkedQueue<cudaEvent_t>()); laneClocks.get(d).add(l, new AtomicLong(0)); } } }
protected void cutTail() { Integer deviceId = allocator.getDeviceId(); for (int l = 0; l < configuration.getCommandLanesNumber(); l++) { Queue<cudaEvent_t> queue = eventsBarrier.get(deviceId).get(l); cudaEvent_t event; while ((event = queue.poll()) != null) { event.synchronize(); event.destroy(); } } }
protected void cutTail() { Integer deviceId = allocator.getDeviceId(); for (int l = 0; l < configuration.getCommandLanesNumber(); l++) { Queue<cudaEvent_t> queue = eventsBarrier.get(deviceId).get(l); cudaEvent_t event; while ((event = queue.poll()) != null) { event.synchronize(); event.destroy(); } } }
/** * This method ensures the events in the beginning of FIFO queues are finished */ protected void sweepTail() { Integer deviceId = allocator.getDeviceId(); int cnt = 0; // we get number of issued commands for specific device long lastCommandId = deviceClocks.get(deviceId).get(); for (int l = 0; l < configuration.getCommandLanesNumber(); l++) { Queue<cudaEvent_t> queue = eventsBarrier.get(deviceId).get(l); if (queue.size() >= MAX_EXECUTION_QUEUE || laneClocks.get(deviceId).get(l).get() < lastCommandId - MAX_EXECUTION_QUEUE) { cudaEvent_t event = queue.poll(); if (event != null && !event.isDestroyed()) { event.synchronize(); event.destroy(); cnt++; } } } deviceClocks.get(deviceId).incrementAndGet(); // log.info("Events sweeped: [{}]", cnt); }
/** * This method ensures the events in the beginning of FIFO queues are finished */ protected void sweepTail() { Integer deviceId = allocator.getDeviceId(); int cnt = 0; // we get number of issued commands for specific device long lastCommandId = deviceClocks.get(deviceId).get(); for (int l = 0; l < configuration.getCommandLanesNumber(); l++) { Queue<cudaEvent_t> queue = eventsBarrier.get(deviceId).get(l); if (queue.size() >= MAX_EXECUTION_QUEUE || laneClocks.get(deviceId).get(l).get() < lastCommandId - MAX_EXECUTION_QUEUE) { cudaEvent_t event = queue.poll(); if (event != null && !event.isDestroyed()) { event.synchronize(); event.destroy(); cnt++; } } } deviceClocks.get(deviceId).incrementAndGet(); // log.info("Events sweeped: [{}]", cnt); }