/** * Calls <a href="http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clEnqueueReadBuffer.html">clEnqueueReadBuffer</a>.<br> */ public Pointer<T> read(CLQueue queue, CLEvent... eventsToWaitFor) { Pointer<T> out = allocateCompatibleMemory(queue.getDevice()); read(queue, out, true, eventsToWaitFor); return out; } /**
/** * Calls <a href="http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clEnqueueReadBuffer.html">clEnqueueReadBuffer</a>.<br> */ public Pointer<T> read(CLQueue queue, long offset, long length, CLEvent... eventsToWaitFor) { Pointer<T> out = allocateCompatibleMemory(queue.getDevice()); read(queue, offset, length, out, true, eventsToWaitFor); return out; }
/** * Calls <a href="http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clEnqueueReadBuffer.html">clEnqueueReadBuffer</a>.<br> */ public Pointer<T> read(CLQueue queue, long offset, long length, CLEvent... eventsToWaitFor) { Pointer<T> out = allocateCompatibleMemory(queue.getDevice()); read(queue, offset, length, out, true, eventsToWaitFor); return out; }
/** * Calls <a href="http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clEnqueueReadBuffer.html">clEnqueueReadBuffer</a>.<br> */ public Pointer<T> read(CLQueue queue, CLEvent... eventsToWaitFor) { Pointer<T> out = allocateCompatibleMemory(queue.getDevice()); read(queue, out, true, eventsToWaitFor); return out; } /**
/** Moves this buffer to host memory and release device buffer. */ @SuppressWarnings("unchecked") @Override public void export() { if (byteBuffer != null) return; // Already on host. if (clBuffer == null) throw new UnsupportedOperationException( "The device buffer has already been released."); if (getByteCount() > Integer.MAX_VALUE) throw new UnsupportedOperationException( "Buffer byte count exceeds java.nio.ByteBuffer maximum capacity"); byteBuffer = ByteBuffer.allocateDirect((int) getByteCount()).order( clQueue.getDevice().getByteOrder()); clBuffer.read(clQueue, (Pointer<Byte>) Pointer.pointerToBuffer(byteBuffer), true /* blocking */, updateEvent); release(); }
/** * Calls <a href="http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clEnqueueWriteBuffer.html">clEnqueueWriteBuffer</a>.<br> * @deprecated use {@link CLBuffer#write(CLQueue, long, long, Pointer, boolean, CLEvent[])} instead * @param queue Execution queue for this operation. * @param offset offset in the {@link CLBuffer} * @param length length to write (in bytes) * @param in input buffer * @param blocking whether the operation should be blocking (and return null), or non-blocking (and return a completion event) * @param eventsToWaitFor Events that need to complete before this particular command can be executed. Special value {@link CLEvent#FIRE_AND_FORGET} can be used to avoid returning a CLEvent. * @return Event object that identifies this command and can be used to query or queue a wait for the command to complete, or null if eventsToWaitFor contains {@link CLEvent#FIRE_AND_FORGET}. */ @Deprecated public CLEvent write(CLQueue queue, long offset, long length, Buffer in, boolean blocking, CLEvent... eventsToWaitFor) { if (in == null) throw new IllegalArgumentException("Null input buffer !"); boolean indirect = !in.isDirect(); Pointer<T> ptr = null; if (indirect) { ptr = allocateArray(io, length).order(queue.getDevice().getKernelsDefaultByteOrder()); ptr.setValues(in); blocking = true; } else { ptr = (Pointer)pointerToBuffer(in); } return write(queue, offset, length, ptr, blocking, eventsToWaitFor); }
/** * Calls <a href="http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clEnqueueWriteBuffer.html">clEnqueueWriteBuffer</a>.<br> * @deprecated use {@link CLBuffer#write(CLQueue, long, long, Pointer, boolean, CLEvent[])} instead * @param queue Execution queue for this operation. * @param offset offset in the {@link CLBuffer} * @param length length to write (in bytes) * @param in input buffer * @param blocking whether the operation should be blocking (and return null), or non-blocking (and return a completion event) * @param eventsToWaitFor Events that need to complete before this particular command can be executed. Special value {@link CLEvent#FIRE_AND_FORGET} can be used to avoid returning a CLEvent. * @return Event object that identifies this command and can be used to query or queue a wait for the command to complete, or null if eventsToWaitFor contains {@link CLEvent#FIRE_AND_FORGET}. */ @Deprecated public CLEvent write(CLQueue queue, long offset, long length, Buffer in, boolean blocking, CLEvent... eventsToWaitFor) { if (in == null) throw new IllegalArgumentException("Null input buffer !"); boolean indirect = !in.isDirect(); Pointer<T> ptr = null; if (indirect) { ptr = allocateArray(io, length).order(queue.getDevice().getKernelsDefaultByteOrder()); ptr.setValues(in); blocking = true; } else { ptr = (Pointer)pointerToBuffer(in); } return write(queue, offset, length, ptr, blocking, eventsToWaitFor); }
Pointer<T> ptr = null; if (indirect) { ptr = allocateArray(io, length).order(queue.getDevice().getKernelsDefaultByteOrder()); blocking = true; } else {
Pointer<T> ptr = null; if (indirect) { ptr = allocateArray(io, length).order(queue.getDevice().getKernelsDefaultByteOrder()); blocking = true; } else {
int maxUnits = queue.getDevice().getMaxComputeUnits(); int unitsFactor = maxUnits < 10 ? 1 : 16; int scheduledWorkItems = maxUnits * unitsFactor;
int maxUnits = queue.getDevice().getMaxComputeUnits(); int unitsFactor = maxUnits < 10 ? 1 : 16; int scheduledWorkItems = maxUnits * unitsFactor;
public <T> CLEvent matrixMultiply(Primitive prim, CLBuffer<T> a, long aRows, long aColumns, long aStride, int aBlockSize, CLBuffer<T> b, long bRows, long bColumns, long bStride, int bBlockSize, CLBuffer<T> out, CLEvent... eventsToWaitFor) throws CLBuildException { boolean useBlocks = false; int blockSize = aBlockSize; if (blockSize > 1 && blockSize == bBlockSize) { long[] maxWorkItemSizes = queue.getDevice().getMaxWorkItemSizes(); useBlocks = maxWorkItemSizes.length >= 2 && maxWorkItemSizes[0] >= blockSize && maxWorkItemSizes[1] >= blockSize; } if (useBlocks) { return blockMatrixMultiply( blockSize, prim, a, roundUp(aRows, blockSize), roundUp(aColumns, blockSize), b, roundUp(bRows, blockSize), roundUp(bColumns, blockSize), out, eventsToWaitFor); } else { return naiveMatrixMultiply(prim, a, aRows, aColumns, aStride, b, bRows, bColumns, bStride, out, eventsToWaitFor); } } public <T> CLEvent blockMatrixMultiply(int blockSize, Primitive prim, CLBuffer<T> a, long aRows, long aColumns, CLBuffer<T> b, long bRows, long bColumns, CLBuffer<T> out, CLEvent... eventsToWaitFor) throws CLBuildException {
int[] blockCountArr = new int[1]; int maxWIS = (int)queue.getDevice().getMaxWorkItemSizes()[0];
int[] blockCountArr = new int[1]; int maxWIS = (int)queue.getDevice().getMaxWorkItemSizes()[0];
return null; return new Pair<Pointer<T>, CLEvent>( pointerToAddress(mappedPeer, io).validElements(length).order(queue.getDevice().getKernelsDefaultByteOrder()), CLEvent.createEventFromPointer(queue, eventOut) );
@Test public void testGPUPerfFloat() throws IOException { //CLKernels.setInstance(new CLKernels(JavaCL.createBestContext(DeviceFeature.GPU).createDefaultQueue())); //int size = 100; for (int size : new int[] { 10, 50, 100/*, 200, 400*/ }) { DefaultDenseFloatMatrix2D mJava = new DefaultDenseFloatMatrix2D(size, size); Matrix pJava = testPerf("Java(size = " + size +")", mJava).getValue(); for (DeviceFeature feat : new DeviceFeature[] { DeviceFeature.CPU, DeviceFeature.GPU }) { CLKernels.setInstance(new CLKernels(JavaCL.createBestContext(feat).createDefaultQueue())); CLDevice device = CLKernels.getInstance().getQueue().getDevice(); CLDenseFloatMatrix2D mCL = new CLDenseFloatMatrix2D(size, size); Matrix pCL = testPerf("OpenCL(size = " + size +", device = " + device + ")", mCL).getValue(); assertEquals(pJava, pCL); } } }
return null; return new Pair<Pointer<T>, CLEvent>( pointerToAddress(mappedPeer, io).validElements(length).order(queue.getDevice().getKernelsDefaultByteOrder()), CLEvent.createEventFromPointer(queue, eventOut) );
}), queue.getDevice().getByteOrder()); gl.glTexImage2D ( GL2.GL_TEXTURE_2D,