public ArrowWrapperWritable emptyBatch() { rootVector.setValueCount(0); for (int fieldIndex = 0; fieldIndex < fieldTypeInfos.size(); fieldIndex++) { final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex); final String fieldName = fieldNames.get(fieldIndex); final FieldType fieldType = toFieldType(fieldTypeInfo); final FieldVector arrowVector = rootVector.addOrGet(fieldName, fieldType, FieldVector.class); arrowVector.setInitialCapacity(0); arrowVector.allocateNew(); } VectorSchemaRoot vectorSchemaRoot = new VectorSchemaRoot(rootVector); return new ArrowWrapperWritable(vectorSchemaRoot, allocator, rootVector); }
private void writeStruct(NonNullableStructVector arrowVector, StructColumnVector hiveVector, StructTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) { final List<String> fieldNames = typeInfo.getAllStructFieldNames(); final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos(); final ColumnVector[] hiveFieldVectors = hiveVector.fields; final int fieldSize = fieldTypeInfos.size(); for (int fieldIndex = 0; fieldIndex < fieldSize; fieldIndex++) { final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex); final ColumnVector hiveFieldVector = hiveFieldVectors[fieldIndex]; final String fieldName = fieldNames.get(fieldIndex); final FieldVector arrowFieldVector = arrowVector.addOrGet(fieldName, toFieldType(fieldTypeInfos.get(fieldIndex)), FieldVector.class); arrowFieldVector.setInitialCapacity(size); arrowFieldVector.allocateNew(); write(arrowFieldVector, hiveFieldVector, fieldTypeInfo, size, vectorizedRowBatch, isNative); } final ArrowBuf validityBuffer = arrowVector.getValidityBuffer(); for (int rowIndex = 0; rowIndex < size; rowIndex++) { if (hiveVector.isNull[rowIndex]) { BitVectorHelper.setValidityBit(validityBuffer, rowIndex, 0); } else { BitVectorHelper.setValidityBitToOne(validityBuffer, rowIndex); } } }
public ArrowWrapperWritable serializeBatch(VectorizedRowBatch vectorizedRowBatch, boolean isNative) { rootVector.setValueCount(0); if(rootVector.getChild(fieldName) != null) { fieldExists = true; final FieldVector arrowVector = rootVector.addOrGet(fieldName, fieldType, FieldVector.class); if(fieldExists) { arrowVector.setValueCount(isNative ? vectorizedRowBatch.size : batchSize); } else { arrowVector.setInitialCapacity(isNative ? vectorizedRowBatch.size : batchSize); arrowVector.allocateNew(); rootVector.setValueCount(batchSize); } else { rootVector.setValueCount(vectorizedRowBatch.size); VectorSchemaRoot vectorSchemaRoot = new VectorSchemaRoot(rootVector); return new ArrowWrapperWritable(vectorSchemaRoot, allocator, rootVector);
private void writeList(ListVector arrowVector, ListColumnVector hiveVector, ListTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) { final int OFFSET_WIDTH = 4; final TypeInfo elementTypeInfo = typeInfo.getListElementTypeInfo(); final ColumnVector hiveElementVector = hiveVector.child; final FieldVector arrowElementVector = (FieldVector) arrowVector.addOrGetVector(toFieldType(elementTypeInfo)).getVector(); arrowElementVector.setInitialCapacity(hiveVector.childCount); arrowElementVector.allocateNew(); write(arrowElementVector, hiveElementVector, elementTypeInfo, hiveVector.childCount, vectorizedRowBatch, isNative); final ArrowBuf offsetBuffer = arrowVector.getOffsetBuffer(); int nextOffset = 0; for (int rowIndex = 0; rowIndex < size; rowIndex++) { if (hiveVector.isNull[rowIndex]) { offsetBuffer.setInt(rowIndex * OFFSET_WIDTH, nextOffset); } else { offsetBuffer.setInt(rowIndex * OFFSET_WIDTH, nextOffset); nextOffset += (int) hiveVector.lengths[rowIndex]; arrowVector.setNotNull(rowIndex); } } offsetBuffer.setInt(size * OFFSET_WIDTH, nextOffset); }
@Override public boolean next(NullWritable key, ArrowWrapperWritable value) throws IOException { try { // Need a way to know what thread to interrupt, since this is a blocking thread. setReaderThread(Thread.currentThread()); boolean hasInput = arrowStreamReader.loadNextBatch(); if (hasInput) { VectorSchemaRoot vectorSchemaRoot = arrowStreamReader.getVectorSchemaRoot(); //There must be at least one column vector Preconditions.checkState(vectorSchemaRoot.getFieldVectors().size() > 0); if(vectorSchemaRoot.getFieldVectors().get(0).getValueCount() == 0) { //An empty batch will appear at the end of the stream return false; } value.setVectorSchemaRoot(arrowStreamReader.getVectorSchemaRoot()); return true; } else { processReaderEvent(); return false; } } catch (IOException io) { failOnInterruption(io); return false; } }
private void readList(FieldVector arrowVector, ListColumnVector hiveVector, ListTypeInfo typeInfo) { final int size = arrowVector.getValueCount(); final ArrowBuf offsets = arrowVector.getOffsetBuffer(); final int OFFSET_WIDTH = 4; read(arrowVector.getChildrenFromFields().get(0), hiveVector.child, typeInfo.getListElementTypeInfo()); for (int i = 0; i < size; i++) { if (arrowVector.isNull(i)) { VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i); } else { hiveVector.isNull[i] = false; final int offset = offsets.getInt(i * OFFSET_WIDTH); hiveVector.offsets[i] = offset; hiveVector.lengths[i] = offsets.getInt((i + 1) * OFFSET_WIDTH) - offset; } } }
private void readStruct(FieldVector arrowVector, StructColumnVector hiveVector, StructTypeInfo typeInfo) { final int size = arrowVector.getValueCount(); final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos(); final int fieldSize = arrowVector.getChildrenFromFields().size(); for (int i = 0; i < fieldSize; i++) { read(arrowVector.getChildrenFromFields().get(i), hiveVector.fields[i], fieldTypeInfos.get(i)); } for (int i = 0; i < size; i++) { if (arrowVector.isNull(i)) { VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i); } else { hiveVector.isNull[i] = false; } } }
@Test public void testPrimitiveCharPadding() throws SerDeException { String[][] schema = { {"char1", "char(10)"}, }; HiveCharWritable[][] rows = new HiveCharWritable[][] { {charW("Hello", 10)}, {charW("world!", 10)}}; ArrowColumnarBatchSerDe serDe = new ArrowColumnarBatchSerDe(); StructObjectInspector rowOI = initSerDe(serDe, schema); ArrowWrapperWritable serialized = null; for (Object[] row : rows) { serialized = serDe.serialize(row, rowOI); } // Pass null to complete a batch if (serialized == null) { serialized = serDe.serialize(null, rowOI); } VarCharVector varCharVector = (VarCharVector) serialized.getVectorSchemaRoot().getFieldVectors().get(0); for (int i = 0; i < rows.length; i++) { assertEquals(rows[i][0].getPaddedValue().toString(), new String(varCharVector.get(i))); } }
private void writeMap(ListVector arrowVector, MapColumnVector hiveVector, MapTypeInfo typeInfo, int size, VectorizedRowBatch vectorizedRowBatch, boolean isNative) { final ListTypeInfo structListTypeInfo = toStructListTypeInfo(typeInfo); final ListColumnVector structListVector = toStructListVector(hiveVector); write(arrowVector, structListVector, structListTypeInfo, size, vectorizedRowBatch, isNative); final ArrowBuf validityBuffer = arrowVector.getValidityBuffer(); for (int rowIndex = 0; rowIndex < size; rowIndex++) { if (hiveVector.isNull[rowIndex]) { BitVectorHelper.setValidityBit(validityBuffer, rowIndex, 0); } else { BitVectorHelper.setValidityBitToOne(validityBuffer, rowIndex); } } }
List<FieldVector> vectors = batchData.getVectorSchemaRoot().getFieldVectors(); batchSize = vectors.get(0).getValueCount(); ArrowWrapperWritable wrapper = new ArrowWrapperWritable(batchData.getVectorSchemaRoot()); currentBatch = (Object[][]) serde.deserialize(wrapper);
@Override final long getLong(int rowId) { return accessor.get(rowId); } }
public Object deserialize(Writable writable) { final ArrowWrapperWritable arrowWrapperWritable = (ArrowWrapperWritable) writable; final VectorSchemaRoot vectorSchemaRoot = arrowWrapperWritable.getVectorSchemaRoot(); final List<FieldVector> fieldVectors = vectorSchemaRoot.getFieldVectors(); final int fieldCount = fieldVectors.size(); final int rowCount = vectorSchemaRoot.getRowCount(); vectorizedRowBatch.ensureSize(rowCount); if (rows == null || rows.length < rowCount ) { rows = new Object[rowCount][]; for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { rows[rowIndex] = new Object[fieldCount]; } } for (int fieldIndex = 0; fieldIndex < fieldCount; fieldIndex++) { final FieldVector fieldVector = fieldVectors.get(fieldIndex); final int projectedCol = vectorizedRowBatch.projectedColumns[fieldIndex]; final ColumnVector columnVector = vectorizedRowBatch.cols[projectedCol]; final TypeInfo typeInfo = serDe.rowTypeInfo.getAllStructFieldTypeInfos().get(fieldIndex); read(fieldVector, columnVector, typeInfo); } for (int rowIndex = 0; rowIndex < rowCount; rowIndex++) { vectorExtractRow.extractRow(vectorizedRowBatch, rowIndex, rows[rowIndex]); } vectorizedRowBatch.reset(); return rows; }
@Override final int getInt(int rowId) { return accessor.get(rowId); } }
private void readMap(FieldVector arrowVector, MapColumnVector hiveVector, MapTypeInfo typeInfo) { final int size = arrowVector.getValueCount(); final ListTypeInfo mapStructListTypeInfo = toStructListTypeInfo(typeInfo); final ListColumnVector mapStructListVector = toStructListVector(hiveVector); final StructColumnVector mapStructVector = (StructColumnVector) mapStructListVector.child; read(arrowVector, mapStructListVector, mapStructListTypeInfo); hiveVector.isRepeating = mapStructListVector.isRepeating; hiveVector.childCount = mapStructListVector.childCount; hiveVector.noNulls = mapStructListVector.noNulls; hiveVector.keys = mapStructVector.fields[0]; hiveVector.values = mapStructVector.fields[1]; System.arraycopy(mapStructListVector.offsets, 0, hiveVector.offsets, 0, size); System.arraycopy(mapStructListVector.lengths, 0, hiveVector.lengths, 0, size); System.arraycopy(mapStructListVector.isNull, 0, hiveVector.isNull, 0, size); }
@Override final boolean getBoolean(int rowId) { return accessor.get(rowId) == 1; } }
@Override final byte[] getBinary(int rowId) { return accessor.getObject(rowId); } }
@Override final short getShort(int rowId) { return accessor.get(rowId); } }
@Override final long getLong(int rowId) { return accessor.get(rowId); } }
@Override final int getInt(int rowId) { return accessor.get(rowId); } }
@Override final long getLong(int rowId) { return accessor.get(rowId); } }