public static ColumnVector createColumnVector(String typeName, DataTypePhysicalVariation dataTypePhysicalVariation) { typeName = typeName.toLowerCase(); // Allow undecorated CHAR and VARCHAR to support scratch column type names. if (typeName.equals("char") || typeName.equals("varchar")) { return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); } TypeInfo typeInfo = (TypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); return createColumnVector(typeInfo, dataTypePhysicalVariation); }
public static ColumnVector createColumnVector(String typeName) { typeName = typeName.toLowerCase(); // Allow undecorated CHAR and VARCHAR to support scratch column type names. if (typeName.equals("char") || typeName.equals("varchar")) { return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); } TypeInfo typeInfo = (TypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); return createColumnVector(typeInfo); }
private BytesColumnVector toString(LongColumnVector date) { BytesColumnVector bcv = new BytesColumnVector(size); for (int i = 0; i < size; i++) { if (date.isNull[i]) { bcv.isNull[i] = true; bcv.noNulls = false; } else { bcv.vector[i] = toString(date.vector[i]); bcv.start[i] = 0; bcv.length[i] = bcv.vector[i].length; } } return bcv; }
@Test // set values by reference, copy the data out, and verify equality public void testLoadBytesColumnVectorByRef() { BytesColumnVector bcv = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); String s = "red"; byte[] b = s.getBytes(StandardCharsets.UTF_8); for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { bcv.setRef(i, b, 0, b.length); } // verify byte[] v = new byte[b.length]; for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { Assert.assertTrue(bcv.length[i] == b.length); System.arraycopy(bcv.vector[i], bcv.start[i], v, 0, b.length); Assert.assertTrue(Arrays.equals(b, v)); } }
private VectorizedRowBatch getVectorizedRowBatchStringLong(byte[] vector, int start, int length) { VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); BytesColumnVector bcv = new BytesColumnVector(1); bcv.vector[0] = vector; bcv.start[0] = start; bcv.length[0] = length; batch.cols[0] = bcv; batch.cols[1] = new LongColumnVector(1); batch.size = 1; return batch; }
public static VectorizedRowBatch getVectorizedRowBatchStringInLongOut() { VectorizedRowBatch batch = new VectorizedRowBatch(2); BytesColumnVector inV; LongColumnVector outV; inV = new BytesColumnVector(); outV = new LongColumnVector(); inV.initBuffer(); inV.setVal(0, StandardCharsets.UTF_8.encode("true").array()); inV.setVal(1, StandardCharsets.UTF_8.encode("TRUE").array()); inV.setVal(2, StandardCharsets.UTF_8.encode("TrUe").array()); inV.setVal(3, StandardCharsets.UTF_8.encode("false").array()); inV.setVal(4, StandardCharsets.UTF_8.encode("FALSE").array()); inV.setVal(5, StandardCharsets.UTF_8.encode("FaLsE").array()); inV.setVal(6, StandardCharsets.UTF_8.encode("").array()); inV.setVal(7, StandardCharsets.UTF_8.encode("Other").array()); batch.cols[0] = inV; batch.cols[1] = outV; batch.size = 8; return batch; }
private VectorizedRowBatch getVectorizedRowBatchStringLong(Timestamp[] inputs, int size) { VectorizedRowBatch batch = new VectorizedRowBatch(2, size); BytesColumnVector bcv = new BytesColumnVector(size); for (int i = 0; i < size; i++) { byte[] encoded = encodeTime(inputs[i % inputs.length]); bcv.vector[i] = encoded; bcv.start[i] = 0; bcv.length[i] = encoded.length; } batch.cols[0] = bcv; batch.cols[1] = new LongColumnVector(size); batch.size = size; return batch; }
@Test // Load a BytesColumnVector by copying in large data, enough to force // the buffer to expand. public void testLoadBytesColumnVectorByValueLargeData() { BytesColumnVector bcv = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); bcv.initBuffer(10); // initialize with estimated element size 10 // Record initial buffer size int initialBufferSize = bcv.bufferSize(); String s = "0123456789"; while (s.length() < 500) { s += s; } byte[] b = s.getBytes(StandardCharsets.UTF_8); for (int i = 0; i != VectorizedRowBatch.DEFAULT_SIZE; i++) { bcv.setVal(i, b, 0, b.length); } // Current buffer size should be larger than initial size Assert.assertTrue(bcv.bufferSize() > initialBufferSize); }
@Override public Void call() throws Exception { int batchSize = 1024; VectorUDFDateString udf = new VectorUDFDateString(0, 1); VectorizedRowBatch batch = new VectorizedRowBatch(2, batchSize); BytesColumnVector in = new BytesColumnVector(batchSize); LongColumnVector out = new LongColumnVector(batchSize); batch.cols[0] = in; batch.cols[1] = out; for (int i = 0; i < batchSize; i++) { byte[] data = String.format("1999-%02d-%02d", 1 + (i % 12), 1 + (i % 15)).getBytes("UTF-8"); in.setRef(i, data, 0, data.length); in.isNull[i] = false; } udf.evaluate(batch); // bug if it throws an exception return (Void) null; } }
private VectorizedRowBatch makeTrimBatch() { VectorizedRowBatch b = new VectorizedRowBatch(2); BytesColumnVector inV = new BytesColumnVector(); BytesColumnVector outV = new BytesColumnVector(); b.cols[0] = inV; b.cols[1] = outV; inV.setRef(0, emptyString, 0, 0); inV.setRef(1, blanksLeft, 0, blanksLeft.length); inV.setRef(2, blanksRight, 0, blanksRight.length); inV.setRef(3, blanksBoth, 0, blanksBoth.length); inV.setRef(4, red, 0, red.length); inV.setRef(5, blankString, 0, blankString.length); b.size = 5; return b; }
private VectorizedRowBatch getVectorizedRandomRowBatchStringLong(int seed, int size) { VectorizedRowBatch batch = new VectorizedRowBatch(2, size); BytesColumnVector bcv = new BytesColumnVector(size); Random rand = new Random(seed); for (int i = 0; i < size; i++) { /* all 32 bit numbers qualify & multiply up to get nano-seconds */ byte[] encoded = encodeTime(RandomTypeUtil.getRandTimestamp(rand).toSqlTimestamp()); bcv.vector[i] = encoded; bcv.start[i] = 0; bcv.length[i] = encoded.length; } batch.cols[0] = bcv; batch.cols[1] = new LongColumnVector(size); batch.size = size; return batch; }
private VectorizedRowBatch makeBatch() { VectorizedRowBatch batch = new VectorizedRowBatch(3); LongColumnVector lv = new LongColumnVector(); DoubleColumnVector dv = new DoubleColumnVector(); BytesColumnVector bv = new BytesColumnVector(); setSampleStringCol(bv); batch.cols[0] = lv; batch.cols[1] = dv; batch.cols[2] = bv; addRandomNulls(batch); return batch; }
@Test public void testFlatten() { verifyFlatten(new LongColumnVector()); verifyFlatten(new DoubleColumnVector()); verifyFlatten(new BytesColumnVector()); }
private VectorizedRowBatch getBatchDecimalString() { VectorizedRowBatch b = new VectorizedRowBatch(2); DecimalColumnVector dv; short scale = 2; b.cols[0] = dv = new DecimalColumnVector(18, scale); b.cols[1] = new BytesColumnVector(); b.size = 3; dv.vector[0].set(HiveDecimal.create("1.1")); dv.vector[1].set(HiveDecimal.create("-2.2")); dv.vector[2].set(HiveDecimal.create("9999999999999999.00")); return b; }
@Test public void testDateAddColScalar() throws HiveException { for (PrimitiveCategory colType1 : dateTimestampStringTypes) testDateAddColScalar(colType1, true); VectorExpression udf = new VectorUDFDateAddColScalar(0, 0, 1); udf.setInputTypeInfos(new TypeInfo[] {TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo}); udf.transientInit(); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); batch.cols[0] = new BytesColumnVector(1); batch.cols[1] = new LongColumnVector(1); BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; byte[] bytes = "error".getBytes(utf8); bcv.vector[0] = bytes; bcv.start[0] = 0; bcv.length[0] = bytes.length; udf.evaluate(batch); Assert.assertEquals(batch.cols[1].isNull[0], true); }
@Test public void testDateSubColScalar() throws HiveException { for (PrimitiveCategory colType1 : dateTimestampStringTypes) testDateAddColScalar(colType1, false); VectorExpression udf = new VectorUDFDateSubColScalar(0, 0, 1); udf.setInputTypeInfos(new TypeInfo[] {TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo}); udf.transientInit(); VectorizedRowBatch batch = new VectorizedRowBatch(2, 1); batch.cols[0] = new BytesColumnVector(1); batch.cols[1] = new LongColumnVector(1); BytesColumnVector bcv = (BytesColumnVector) batch.cols[0]; byte[] bytes = "error".getBytes(utf8); bcv.vector[0] = bytes; bcv.start[0] = 0; bcv.length[0] = bytes.length; udf.evaluate(batch); Assert.assertEquals(batch.cols[1].isNull[0], true); }
@Test public void testDateSubColCol() throws HiveException { for (PrimitiveCategory colType1 : dateTimestampStringTypes) testDateAddColCol(colType1, false); VectorExpression udf = new VectorUDFDateSubColCol(0, 1, 2); VectorizedRowBatch batch = new VectorizedRowBatch(3, 1); BytesColumnVector bcv; byte[] bytes = "error".getBytes(utf8); udf.setInputTypeInfos(new TypeInfo[] {TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo}); udf.transientInit(); batch.cols[0] = new BytesColumnVector(1); batch.cols[1] = new LongColumnVector(1); batch.cols[2] = new LongColumnVector(1); bcv = (BytesColumnVector) batch.cols[0]; bcv.vector[0] = bytes; bcv.start[0] = 0; bcv.length[0] = bytes.length; udf.evaluate(batch); Assert.assertEquals(batch.cols[2].isNull[0], true); }
@Test public void testDateAddColCol() throws HiveException { for (PrimitiveCategory colType1 : dateTimestampStringTypes) testDateAddColCol(colType1, true); VectorExpression udf = new VectorUDFDateAddColCol(0, 1, 2); VectorizedRowBatch batch = new VectorizedRowBatch(3, 1); BytesColumnVector bcv; byte[] bytes = "error".getBytes(utf8); udf.setInputTypeInfos(new TypeInfo[] {TypeInfoFactory.stringTypeInfo, TypeInfoFactory.timestampTypeInfo}); udf.transientInit(); batch.cols[0] = new BytesColumnVector(1); batch.cols[1] = new LongColumnVector(1); batch.cols[2] = new LongColumnVector(1); bcv = (BytesColumnVector) batch.cols[0]; bcv.vector[0] = bytes; bcv.start[0] = 0; bcv.length[0] = bytes.length; udf.evaluate(batch); Assert.assertEquals(batch.cols[2].isNull[0], true); }
private VectorizedRowBatch getBatchStringDecimal() { VectorizedRowBatch b = new VectorizedRowBatch(2); BytesColumnVector bv; b.cols[0] = bv = new BytesColumnVector(); b.cols[1] = new DecimalColumnVector(18, 2); bv.initBuffer(); byte[] x0 = toBytes("1.10"); byte[] x1 = toBytes("-2.20"); byte[] x2 = toBytes("99999999999999.0"); bv.setVal(0, x0, 0, x0.length); bv.setVal(1, x1, 0, x1.length); bv.setVal(2, x2, 0, x2.length); return b; }
@Test public void testFilterStringNotBetween() throws HiveException { // Spot check only. Non-standard cases are checked for the same template in another test. int seed = 17; VectorizedRowBatch vrb = VectorizedRowGroupGenUtil.getVectorizedRowBatch( 3, 2, seed); vrb.cols[0] = new BytesColumnVector(); BytesColumnVector bcv = (BytesColumnVector) vrb.cols[0]; bcv.initBuffer(); bcv.setVal(0, a, 0, 1); bcv.setVal(1, b, 0, 1); bcv.setVal(2, c, 0, 1); VectorExpression expr = new FilterStringColumnNotBetween(0, b, c); expr.evaluate(vrb); assertEquals(1, vrb.size); assertTrue(vrb.selectedInUse); assertEquals(0, vrb.selected[0]); }