public synchronized CLKernel getKernel(Fun2 op, Primitive prim, boolean secondOperandIsScalar) throws CLBuildException { return getKernel(op, prim, prim, prim, secondOperandIsScalar); }
public synchronized CLKernel getKernel(Fun2 op, Primitive prim, boolean secondOperandIsScalar) throws CLBuildException { return getKernel(op, prim, prim, prim, secondOperandIsScalar); }
public <T> CLEvent op1(Primitive prim, Fun1 fun, CLBuffer<T> a, long rows, long columns, long stride, CLBuffer<T> out, CLEvent... eventsToWaitFor) throws CLBuildException { long length = rows * stride; if (out == null || out.getElementCount() < length) throw new IllegalArgumentException("Expected buffer of length >= " + length + ", got " + out); //if (out != null) // out = (CLBuffer<T>)context.createBuffer(Usage.Output, prim.primitiveType, length); CLKernel kernel = math.getKernel(fun, prim); synchronized (kernel) { kernel.setArgs(a, out, length); CLEvent evt = kernel.enqueueNDRange(queue, new int [] { (int)length }, eventsToWaitFor); return evt; } }
public <T> CLEvent op2(Primitive prim, Fun2 fun, CLBuffer<T> a, CLBuffer<T> b, long rows, long columns, long stride, CLBuffer<T> out, CLEvent... eventsToWaitFor) throws CLBuildException { long length = rows * stride; if (out == null || out.getElementCount() < length) throw new IllegalArgumentException("Expected buffer of length >= " + length + ", got " + out.getElementCount()); //if (out != null) // out = (CLBuffer<T>)context.createBuffer(Usage.Output, prim.primitiveType, length); CLKernel kernel = math.getKernel(fun, prim, false); synchronized (kernel) { kernel.setArgs(a, b, out, length); CLEvent evt = kernel.enqueueNDRange(queue, new int [] { (int)length }, eventsToWaitFor); return evt; } }
public <T> CLEvent op2(Primitive prim, Fun2 fun, CLBuffer<T> a, T b, long rows, long columns, long stride, CLBuffer<T> out, CLEvent... eventsToWaitFor) throws CLBuildException { long length = rows * stride; if (out == null || out.getElementCount() < length) throw new IllegalArgumentException("Expected buffer of length >= " + length + ", got " + out.getElementCount()); //if (out != null) // out = (CLBuffer<T>)context.createBuffer(Usage.Output, prim.primitiveType, length); CLKernel kernel = math.getKernel(fun, prim, true); synchronized (kernel) { kernel.setArgs(a, b, out, length); CLEvent evt = kernel.enqueueNDRange(queue, new int [] { (int)length }, eventsToWaitFor); return evt; } }