public void convert(JsonElement value, ColumnVector vect, int row) { if (value == null || value.isJsonNull()) { vect.noNulls = false; vect.isNull[row] = true; } else { BytesColumnVector vector = (BytesColumnVector) vect; byte[] bytes = value.getAsString().getBytes( StandardCharsets.UTF_8); vector.setRef(row, bytes, 0, bytes.length); } } }
public void convert(JsonElement value, ColumnVector vect, int row) { if (value == null || value.isJsonNull()) { vect.noNulls = false; vect.isNull[row] = true; } else { BytesColumnVector vector = (BytesColumnVector) vect; String binStr = value.getAsString(); byte[] bytes = new byte[binStr.length() / 2]; for (int i = 0; i < bytes.length; ++i) { bytes[i] = (byte) Integer.parseInt( binStr.substring(i * 2, i * 2 + 2), 16); } vector.setRef(row, bytes, 0, bytes.length); } } }
outputColVector.setRef(0, arg2Scalar, 0, arg2Scalar.length); } else { outputColVector.setRef(0, arg3Scalar, 0, arg3Scalar.length); outputIsNull[i] = false; if (vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); } else { outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); for(int i = 0; i != n; i++) { if (vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); } else { outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); outputIsNull[i] = false; if (!arg1ColVector.isNull[i] && vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); } else { outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); for(int i = 0; i != n; i++) { if (!arg1ColVector.isNull[i] && vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); } else { outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length);
@Override void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { BytesColumnVector inColVector = (BytesColumnVector) inBatch.cols[inColumnIndex]; BytesColumnVector outColVector = (BytesColumnVector) outBatch.cols[outColumnIndex]; if (inColVector.isRepeating) { if (inColVector.noNulls || !inColVector.isNull[0]) { outColVector.setRef(outBatchIndex, inColVector.vector[0], inColVector.start[0], inColVector.length[0]); outColVector.isNull[outBatchIndex] = false; } else { VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); } } else { if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { outColVector.setRef(outBatchIndex, inColVector.vector[inBatchIndex], inColVector.start[inBatchIndex], inColVector.length[inBatchIndex]); outColVector.isNull[outBatchIndex] = false; } else { VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); } } } }
@Override void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBatch, int outBatchIndex) { BytesColumnVector inColVector = (BytesColumnVector) inBatch.cols[inColumnIndex]; BytesColumnVector outColVector = (BytesColumnVector) outBatch.cols[outColumnIndex]; if (inColVector.isRepeating) { if (inColVector.noNulls || !inColVector.isNull[0]) { outColVector.setRef(outBatchIndex, inColVector.vector[0], inColVector.start[0], inColVector.length[0]); outColVector.isNull[outBatchIndex] = false; } else { VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); } } else { if (inColVector.noNulls || !inColVector.isNull[inBatchIndex]) { outColVector.setRef(outBatchIndex, inColVector.vector[inBatchIndex], inColVector.start[inBatchIndex], inColVector.length[inBatchIndex]); outColVector.isNull[outBatchIndex] = false; } else { VectorizedBatchUtil.setNullColIsNullValue(outColVector, outBatchIndex); } } } }
private VectorizedRowBatch makeTrimBatch() { VectorizedRowBatch b = new VectorizedRowBatch(2); BytesColumnVector inV = new BytesColumnVector(); BytesColumnVector outV = new BytesColumnVector(); b.cols[0] = inV; b.cols[1] = outV; inV.setRef(0, emptyString, 0, 0); inV.setRef(1, blanksLeft, 0, blanksLeft.length); inV.setRef(2, blanksRight, 0, blanksRight.length); inV.setRef(3, blanksBoth, 0, blanksBoth.length); inV.setRef(4, red, 0, red.length); inV.setRef(5, blankString, 0, blankString.length); b.size = 5; return b; }
/** Simplify vector by brute-force flattening noNulls and isRepeating * This can be used to reduce combinatorial explosion of code paths in VectorExpressions * with many arguments, at the expense of loss of some performance. */ public void flatten(boolean selectedInUse, int[] sel, int size) { flattenPush(); if (isRepeating) { isRepeating = false; // setRef is used below and this is safe, because the reference // is to data owned by this column vector. If this column vector // gets re-used, the whole thing is re-used together so there // is no danger of a dangling reference. // Only copy data values if entry is not null. The string value // at position 0 is undefined if the position 0 value is null. if (noNulls || !isNull[0]) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; this.setRef(i, vector[0], start[0], length[0]); } } else { for (int i = 0; i < size; i++) { this.setRef(i, vector[0], start[0], length[0]); } } } flattenRepeatingNulls(selectedInUse, sel, size); } flattenNoNulls(selectedInUse, sel, size); }
/** * Set vector elements to sample string data from colorsBytes string table. * @param col */ public static void setSampleStringCol(BytesColumnVector col) { initColors(); int size = col.vector.length; for(int i = 0; i != size; i++) { int pos = i % colorsBytes.length; col.setRef(i, colorsBytes[pos], 0, colorsBytes[pos].length); } }
private VectorizedRowBatch makeStringBatch2In1Out() { VectorizedRowBatch batch = new VectorizedRowBatch(3); BytesColumnVector v = new BytesColumnVector(); batch.cols[0] = v; BytesColumnVector v2 = new BytesColumnVector(); batch.cols[1] = v2; batch.cols[2] = new BytesColumnVector(); v.setRef(0, red, 0, red.length); v.isNull[0] = false; v.setRef(1, green, 0, green.length); v.isNull[1] = false; v.setRef(2, emptyString, 0, emptyString.length); v.isNull[2] = true; v.noNulls = false; v2.setRef(0, red, 0, red.length); v2.isNull[0] = false; v2.setRef(1, green, 0, green.length); v2.isNull[1] = false; v2.setRef(2, emptyString, 0, emptyString.length); v2.isNull[2] = true; v2.noNulls = false; batch.size = 3; return batch; }
private VectorizedRowBatch makeStringBatchForColColCompare() { VectorizedRowBatch batch = new VectorizedRowBatch(4); BytesColumnVector v = new BytesColumnVector(); batch.cols[0] = v; BytesColumnVector v2 = new BytesColumnVector(); batch.cols[1] = v2; batch.cols[2] = new BytesColumnVector(); batch.cols[3] = new LongColumnVector(); v.setRef(0, blue, 0, blue.length); v.isNull[0] = false; v.setRef(1, green, 0, green.length); v.isNull[1] = false; v.setRef(2, red, 0, red.length); v.isNull[2] = false; v.setRef(3, emptyString, 0, emptyString.length); v.isNull[3] = true; v.noNulls = false; v2.setRef(0, red, 0, red.length); v2.isNull[0] = false; v2.setRef(1, green, 0, green.length); v2.isNull[1] = false; v2.setRef(2, blue, 0, blue.length); v2.isNull[2] = false; v2.setRef(3, red, 0, red.length); v2.isNull[3] = false; v2.noNulls = false; batch.size = 4; return batch; }
VectorizedRowBatch makeStringBatchMixedCharSize() { // create a new batch with one char column (for input) and one long column (for output) VectorizedRowBatch batch = new VectorizedRowBatch(2, VectorizedRowBatch.DEFAULT_SIZE); BytesColumnVector v = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); batch.cols[0] = v; LongColumnVector outV = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); batch.cols[1] = outV; /* * Add these 3 values: * * mixedUp * green * NULL * <4 char string with mult-byte chars> */ v.setRef(0, mixedUp, 0, mixedUp.length); v.isNull[0] = false; v.setRef(1, green, 0, green.length); v.isNull[1] = false; v.setRef(2, emptyString, 0, emptyString.length); v.isNull[2] = true; v.noNulls = false; v.setRef(3, multiByte, 0, 10); v.isNull[3] = false; batch.size = 4; return batch; }
@Test // set values by reference, copy the data out, and verify equality public void testLoadBytesColumnVectorByRef() { BytesColumnVector bcv = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); String s = "red"; byte[] b = s.getBytes(StandardCharsets.UTF_8); for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { bcv.setRef(i, b, 0, b.length); } // verify byte[] v = new byte[b.length]; for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { Assert.assertTrue(bcv.length[i] == b.length); System.arraycopy(bcv.vector[i], bcv.start[i], v, 0, b.length); Assert.assertTrue(Arrays.equals(b, v)); } }
VectorizedRowBatch makeStringBatch() { // create a batch with one string ("Bytes") column VectorizedRowBatch batch = new VectorizedRowBatch(3); BytesColumnVector v = new BytesColumnVector(); batch.cols[0] = v; batch.cols[1] = new BytesColumnVector(); // to hold output if needed batch.cols[2] = new LongColumnVector(batch.size); // to hold boolean output /* * Add these 3 values: * * red * green * NULL */ v.setRef(0, red, 0, red.length); v.isNull[0] = false; v.setRef(1, green, 0, green.length); v.isNull[1] = false; v.setRef(2, emptyString, 0, emptyString.length); v.isNull[2] = true; v.noNulls = false; batch.size = 3; return batch; }
VectorizedRowBatch makeStringBatchMixedCase() { // create a batch with two string ("Bytes") columns VectorizedRowBatch batch = new VectorizedRowBatch(2, VectorizedRowBatch.DEFAULT_SIZE); BytesColumnVector v = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); batch.cols[0] = v; BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); outV.initBuffer(); batch.cols[1] = outV; /* * Add these 3 values: * * mixedUp * green * NULL */ v.setRef(0, mixedUp, 0, mixedUp.length); v.isNull[0] = false; v.setRef(1, green, 0, green.length); v.isNull[1] = false; v.setRef(2, emptyString, 0, emptyString.length); v.isNull[2] = true; v.noNulls = false; batch.size = 3; return batch; }
String s4 = "27V63IL7jK3o"; v.isNull[0] = false; v.setRef(0, s1.getBytes(), 0, s1.getBytes().length); v.isNull[1] = true; v.vector[1] = null; v.isNull[2] = false; v.setRef(2, s2.getBytes(), 0, s2.getBytes().length); v.isNull[3] = false; v.setRef(3, s3.getBytes(), 0, s3.getBytes().length); v.isNull[4] = false; v.setRef(4, s4.getBytes(), 0, s4.getBytes().length);
for (int i = 0; i < valueList.size(); i++) { byte[] src = ((List<byte[]>) valueList).get(i); ((BytesColumnVector) lcv.child).setRef(i, src, 0, src.length);
((BytesColumnVector) targetColVector).setVal(0, bytes, start, length); } else { ((BytesColumnVector) targetColVector).setRef(0, bytes, start, length);
bv.setRef(0, b, 0, b.length); bv.flatten(true, sel, 2); Assert.assertEquals(bv.vector[0], bv.vector[2]);
@Override public Void call() throws Exception { int batchSize = 1024; VectorUDFDateString udf = new VectorUDFDateString(0, 1); VectorizedRowBatch batch = new VectorizedRowBatch(2, batchSize); BytesColumnVector in = new BytesColumnVector(batchSize); LongColumnVector out = new LongColumnVector(batchSize); batch.cols[0] = in; batch.cols[1] = out; for (int i = 0; i < batchSize; i++) { byte[] data = String.format("1999-%02d-%02d", 1 + (i % 12), 1 + (i % 15)).getBytes("UTF-8"); in.setRef(i, data, 0, data.length); in.isNull[i] = false; } udf.evaluate(batch); // bug if it throws an exception return (Void) null; } }