public cudaEvent_t getEvent() { int deviceId = Nd4j.getAffinityManager().getDeviceForCurrentThread(); cudaEvent_t e = queue.get(deviceId).poll(); if (e == null) { e = new cudaEvent_t(NativeOpsHolder.getInstance().getDeviceNativeOps().createEvent()); e.setDeviceId(deviceId); newCounter.incrementAndGet(); } else cacheCounter.incrementAndGet(); return e; }
public cudaEvent_t getEvent() { int deviceId = Nd4j.getAffinityManager().getDeviceForCurrentThread(); cudaEvent_t e = queue.get(deviceId).poll(); if (e == null) { e = new cudaEvent_t(NativeOpsHolder.getInstance().getDeviceNativeOps().createEvent()); e.setDeviceId(deviceId); newCounter.incrementAndGet(); } else cacheCounter.incrementAndGet(); return e; }
@Override public void registerAction(CudaContext context, INDArray result, INDArray... operands) { // TODO: this should be lane-dependant context if (totalHits.incrementAndGet() % 25000 == 0) { log.debug("AsyncHit ratio: [{}]", getAsyncHitRatio()); /* for (int lane = 0; lane < allocator.getContextPool().acquireContextPackForDevice(0).getAvailableLanes(); lane++) { log.debug("Lane [{}]: {} ", lane, lanesCounter.get(lane).get()); } */ } cudaEvent_t event = new cudaEvent_t(nativeOps.createEvent()); event.setLaneId(context.getLaneId()); nativeOps.registerEvent(event, context.getOldStream()); if (result != null) { setWriteLane(result, event); allocator.tickDeviceWrite(result); } for (INDArray operand : operands) { if (operand == null) continue; setReadLane(operand, event); } Integer deviceId = allocator.getDeviceId(); fillTail(deviceId, event.getLaneId(), event); }
@Override public void registerAction(CudaContext context, AllocationPoint result, AllocationPoint... operands) { cudaEvent_t event = new cudaEvent_t(nativeOps.createEvent()); event.setLaneId(context.getLaneId()); nativeOps.registerEvent(event, context.getOldStream()); result.setWriteLane(event); Integer deviceId = allocator.getDeviceId(); fillTail(deviceId, event.getLaneId(), event); }
@Override public void registerAction(CudaContext context, AllocationPoint result, AllocationPoint... operands) { cudaEvent_t event = new cudaEvent_t(nativeOps.createEvent()); event.setLaneId(context.getLaneId()); nativeOps.registerEvent(event, context.getOldStream()); result.setWriteLane(event); Integer deviceId = allocator.getDeviceId(); fillTail(deviceId, event.getLaneId(), event); }
@Override public void registerAction(CudaContext context, INDArray result, INDArray... operands) { // TODO: this should be lane-dependant context if (totalHits.incrementAndGet() % 25000 == 0) { log.debug("AsyncHit ratio: [{}]", getAsyncHitRatio()); /* for (int lane = 0; lane < allocator.getContextPool().acquireContextPackForDevice(0).getAvailableLanes(); lane++) { log.debug("Lane [{}]: {} ", lane, lanesCounter.get(lane).get()); } */ } cudaEvent_t event = new cudaEvent_t(nativeOps.createEvent()); event.setLaneId(context.getLaneId()); nativeOps.registerEvent(event, context.getOldStream()); if (result != null) { setWriteLane(result, event); allocator.tickDeviceWrite(result); } for (INDArray operand : operands) { if (operand == null) continue; setReadLane(operand, event); } Integer deviceId = allocator.getDeviceId(); fillTail(deviceId, event.getLaneId(), event); }