outputColVector.initBuffer(); arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { outputColVector.fill(arg3Scalar); arg2ColVector.flatten(batch.selectedInUse, sel, n); if (vector1[i] == 1) { if (!arg2ColVector.isNull[i]) { outputColVector.setVal( i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); if (vector1[i] == 1) { if (!arg2ColVector.isNull[i]) { outputColVector.setVal( i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); if (!arg1ColVector.isNull[i] && vector1[i] == 1) { if (!arg2ColVector.isNull[i]) { outputColVector.setVal( i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length);
public static ColumnVector createColumnVector(String typeName, DataTypePhysicalVariation dataTypePhysicalVariation) { typeName = typeName.toLowerCase(); // Allow undecorated CHAR and VARCHAR to support scratch column type names. if (typeName.equals("char") || typeName.equals("varchar")) { return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); } TypeInfo typeInfo = (TypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); return createColumnVector(typeInfo, dataTypePhysicalVariation); }
public static void padRight(BytesColumnVector outV, int i, byte[] bytes, int start, int length, int maxCharacterLength) { final int characterLength = StringExpr.characterCount(bytes, start, length); final int blankPadLength = Math.max(maxCharacterLength - characterLength, 0); final int resultLength = length + blankPadLength; outV.ensureValPreallocated(resultLength); byte[] resultBytes = outV.getValPreallocatedBytes(); final int resultStart = outV.getValPreallocatedStart(); System.arraycopy(bytes, start, resultBytes, resultStart, length); final int padEnd = resultStart + resultLength; for (int p = resultStart + length; p < padEnd; p++) { resultBytes[p] = ' '; } outV.setValPreallocated(i, resultLength); }
private void evaluateBytes(ColumnVector colVector) { BytesColumnVector cv = (BytesColumnVector) colVector; cv.isRepeating = true; cv.initBuffer(); if (!isNullValue) { cv.isNull[0] = false; cv.setVal(0, bytesValue, 0, bytesValueLength); } else { cv.isNull[0] = true; cv.noNulls = false; } }
/** Simplify vector by brute-force flattening noNulls and isRepeating * This can be used to reduce combinatorial explosion of code paths in VectorExpressions * with many arguments, at the expense of loss of some performance. */ public void flatten(boolean selectedInUse, int[] sel, int size) { flattenPush(); if (isRepeating) { isRepeating = false; // setRef is used below and this is safe, because the reference // is to data owned by this column vector. If this column vector // gets re-used, the whole thing is re-used together so there // is no danger of a dangling reference. // Only copy data values if entry is not null. The string value // at position 0 is undefined if the position 0 value is null. if (noNulls || !isNull[0]) { if (selectedInUse) { for (int j = 0; j < size; j++) { int i = sel[j]; this.setRef(i, vector[0], start[0], length[0]); } } else { for (int i = 0; i < size; i++) { this.setRef(i, vector[0], start[0], length[0]); } } } flattenRepeatingNulls(selectedInUse, sel, size); } flattenNoNulls(selectedInUse, sel, size); }
public static VectorizedRowBatch getBatchForStringMath() { VectorizedRowBatch batch = new VectorizedRowBatch(3); LongColumnVector inL; BytesColumnVector inS, outS; inL = new LongColumnVector(); inS = new BytesColumnVector(); outS = new BytesColumnVector(); inL.vector[0] = 0; inL.vector[1] = 255; inL.vector[2] = 0; inS.initBuffer(); inS.setVal(0, "00".getBytes(StandardCharsets.UTF_8), 0, 2); inS.setVal(1, "3232".getBytes(StandardCharsets.UTF_8), 0, 4); byte[] bad = "bad data".getBytes(StandardCharsets.UTF_8); inS.setVal(2, bad, 0, bad.length); batch.cols[0] = inS; batch.cols[1] = inL; batch.cols[2] = outS; batch.size = 3; return batch; }
bytesColVec.ensureValPreallocated(deserializeRead.currentExternalBufferNeededLen); deserializeRead.copyToExternalBuffer( bytesColVec.getValPreallocatedBytes(), bytesColVec.getValPreallocatedStart()); bytesColVec.setValPreallocated( batchIndex, deserializeRead.currentExternalBufferNeededLen); } else if (canRetainByteRef && inputBytes == deserializeRead.currentBytes) { bytesColVec.setRef( batchIndex, deserializeRead.currentBytes, deserializeRead.currentBytesLength); } else { bytesColVec.setVal( batchIndex, deserializeRead.currentBytes, bytesColVec.ensureValPreallocated(deserializeRead.currentExternalBufferNeededLen); byte[] convertBuffer = bytesColVec.getValPreallocatedBytes(); int convertBufferStart = bytesColVec.getValPreallocatedStart(); deserializeRead.copyToExternalBuffer( convertBuffer, convertBufferStart); bytesColVec.setValPreallocated( batchIndex, StringExpr.truncate( maxLengths[logicalColumnIndex])); } else if (canRetainByteRef && inputBytes == deserializeRead.currentBytes) {
private VectorizedRowBatch makeTrimBatch() { VectorizedRowBatch b = new VectorizedRowBatch(2); BytesColumnVector inV = new BytesColumnVector(); BytesColumnVector outV = new BytesColumnVector(); b.cols[0] = inV; b.cols[1] = outV; inV.setRef(0, emptyString, 0, 0); inV.setRef(1, blanksLeft, 0, blanksLeft.length); inV.setRef(2, blanksRight, 0, blanksRight.length); inV.setRef(3, blanksBoth, 0, blanksBoth.length); inV.setRef(4, red, 0, red.length); inV.setRef(5, blankString, 0, blankString.length); b.size = 5; return b; }
outputColVector.initBuffer(); arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); } else { arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); arg2ColVector.flatten(batch.selectedInUse, sel, n); arg3ColVector.flatten(batch.selectedInUse, sel, n); if (vector1[i] == 1) { if (!arg2ColVector.isNull[i]) { outputColVector.setVal( i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); outputColVector.setVal( i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); if (vector1[i] == 1) { if (!arg2ColVector.isNull[i]) { outputColVector.setVal( i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); outputColVector.setVal( i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); if (!arg1ColVector.isNull[i] && vector1[i] == 1) { if (!arg2ColVector.isNull[i]) { outputColVector.setVal( i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]);
public static void rightTrim(BytesColumnVector outV, int i, byte[] bytes, int start, int length) { // skip trailing blank characters int j = start + length - 1; while(j >= start && bytes[j] == 0x20) { j--; } // set output vector outV.setVal(i, bytes, start, (j - start) + 1); }
@Test // Load a BytesColumnVector by copying in large data, enough to force // the buffer to expand. public void testLoadBytesColumnVectorByValueLargeData() { BytesColumnVector bcv = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); bcv.initBuffer(10); // initialize with estimated element size 10 // Record initial buffer size int initialBufferSize = bcv.bufferSize(); String s = "0123456789"; while (s.length() < 500) { s += s; } byte[] b = s.getBytes(StandardCharsets.UTF_8); for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { bcv.setVal(i, b, 0, b.length); } // Current buffer size should be larger than initial size Assert.assertTrue(bcv.bufferSize() > initialBufferSize); }
outV.initBuffer(); if (inputColVector.isRepeating) { outV.isRepeating = true; outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); } else if (batch.selectedInUse) { for(int j = 0; j != n; j++) { int i = sel[j]; outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); outV.isNull[0] = inputColVector.isNull[0]; if (!inputColVector.isNull[0]) { outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); int i = sel[j]; if (!inputColVector.isNull[i]) { outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); for(int i = 0; i != n; i++) { if (!inputColVector.isNull[i]) { outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]);
VectorizedRowBatch makeStringBatchMixedCase() { // create a batch with two string ("Bytes") columns VectorizedRowBatch batch = new VectorizedRowBatch(2, VectorizedRowBatch.DEFAULT_SIZE); BytesColumnVector v = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); batch.cols[0] = v; BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); outV.initBuffer(); batch.cols[1] = outV; /* * Add these 3 values: * * mixedUp * green * NULL */ v.setRef(0, mixedUp, 0, mixedUp.length); v.isNull[0] = false; v.setRef(1, green, 0, green.length); v.isNull[1] = false; v.setRef(2, emptyString, 0, emptyString.length); v.isNull[2] = true; v.noNulls = false; batch.size = 3; return batch; }
((LongColumnVector) batch.cols[0]).vector[r] = r * 42; lcv.vector[r] = r * 10001; ((BytesColumnVector) batch.cols[2]).setVal(r, Integer.toHexString(r).getBytes(StandardCharsets.UTF_8)); assertEquals("row " + r, r * 10001, lcv.vector[r]); assertEquals("row " + r, Integer.toHexString(r), ((BytesColumnVector) batch.cols[2]).toString(r));
public void convert(JsonElement value, ColumnVector vect, int row) { if (value == null || value.isJsonNull()) { vect.noNulls = false; vect.isNull[row] = true; } else { BytesColumnVector vector = (BytesColumnVector) vect; byte[] bytes = value.getAsString().getBytes( StandardCharsets.UTF_8); vector.setRef(row, bytes, 0, bytes.length); } } }
outputColVector.initBuffer(); outputColVector.setRef(0, arg2Scalar, 0, arg2Scalar.length); } else { outputColVector.setRef(0, arg3Scalar, 0, arg3Scalar.length); outputIsNull[i] = false; if (vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); } else { outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); for(int i = 0; i != n; i++) { if (vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); } else { outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); outputIsNull[i] = false; if (!arg1ColVector.isNull[i] && vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); } else { outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); for(int i = 0; i != n; i++) { if (!arg1ColVector.isNull[i] && vector1[i] == 1) { outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); } else { outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length);
public static HiveVarcharWritable nextVarchar( ColumnVector vector, int row, int size, Object previous) { if (vector.isRepeating) { row = 0; } if (vector.noNulls || !vector.isNull[row]) { HiveVarcharWritable result; if (previous == null || previous.getClass() != HiveVarcharWritable.class) { result = new HiveVarcharWritable(); } else { result = (HiveVarcharWritable) previous; } BytesColumnVector bytes = (BytesColumnVector) vector; result.set(bytes.toString(row), size); return result; } else { return null; } }
public void initBuffer(VectorizedRowBatch batch) { BytesColumnVector cv = (BytesColumnVector) batch.cols[outputColumnNum]; cv.isRepeating = false; cv.initBuffer(); }