private void readMap(FieldVector arrowVector, MapColumnVector hiveVector, MapTypeInfo typeInfo) { final int size = arrowVector.getValueCount(); final ListTypeInfo mapStructListTypeInfo = toStructListTypeInfo(typeInfo); final ListColumnVector mapStructListVector = toStructListVector(hiveVector); final StructColumnVector mapStructVector = (StructColumnVector) mapStructListVector.child; read(arrowVector, mapStructListVector, mapStructListTypeInfo); hiveVector.isRepeating = mapStructListVector.isRepeating; hiveVector.childCount = mapStructListVector.childCount; hiveVector.noNulls = mapStructListVector.noNulls; hiveVector.keys = mapStructVector.fields[0]; hiveVector.values = mapStructVector.fields[1]; System.arraycopy(mapStructListVector.offsets, 0, hiveVector.offsets, 0, size); System.arraycopy(mapStructListVector.lengths, 0, hiveVector.lengths, 0, size); System.arraycopy(mapStructListVector.isNull, 0, hiveVector.isNull, 0, size); }
private void readStruct(FieldVector arrowVector, StructColumnVector hiveVector, StructTypeInfo typeInfo) { final int size = arrowVector.getValueCount(); final List<TypeInfo> fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos(); final int fieldSize = arrowVector.getChildrenFromFields().size(); for (int i = 0; i < fieldSize; i++) { read(arrowVector.getChildrenFromFields().get(i), hiveVector.fields[i], fieldTypeInfos.get(i)); } for (int i = 0; i < size; i++) { if (arrowVector.isNull(i)) { VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i); } else { hiveVector.isNull[i] = false; } } }
batchSize = vectors.get(0).getValueCount(); ArrowWrapperWritable wrapper = new ArrowWrapperWritable(batchData.getVectorSchemaRoot()); currentBatch = (Object[][]) serde.deserialize(wrapper);
@Override public boolean next(NullWritable key, ArrowWrapperWritable value) throws IOException { try { // Need a way to know what thread to interrupt, since this is a blocking thread. setReaderThread(Thread.currentThread()); boolean hasInput = arrowStreamReader.loadNextBatch(); if (hasInput) { VectorSchemaRoot vectorSchemaRoot = arrowStreamReader.getVectorSchemaRoot(); //There must be at least one column vector Preconditions.checkState(vectorSchemaRoot.getFieldVectors().size() > 0); if(vectorSchemaRoot.getFieldVectors().get(0).getValueCount() == 0) { //An empty batch will appear at the end of the stream return false; } value.setVectorSchemaRoot(arrowStreamReader.getVectorSchemaRoot()); return true; } else { processReaderEvent(); return false; } } catch (IOException io) { failOnInterruption(io); return false; } }
private void readList(FieldVector arrowVector, ListColumnVector hiveVector, ListTypeInfo typeInfo) { final int size = arrowVector.getValueCount(); final ArrowBuf offsets = arrowVector.getOffsetBuffer(); final int OFFSET_WIDTH = 4; read(arrowVector.getChildrenFromFields().get(0), hiveVector.child, typeInfo.getListElementTypeInfo()); for (int i = 0; i < size; i++) { if (arrowVector.isNull(i)) { VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i); } else { hiveVector.isNull[i] = false; final int offset = offsets.getInt(i * OFFSET_WIDTH); hiveVector.offsets[i] = offset; hiveVector.lengths[i] = offsets.getInt((i + 1) * OFFSET_WIDTH) - offset; } } }
public int getInnerValueCount() { return vector.getValueCount(); }
/** * An index in to an individual * {@link ArrowRecordBatch} * @param list the list of field vectors to use * @param schema the schema to use */ public ArrowWritableRecordBatch(List<FieldVector> list, Schema schema) { this(list,schema,0,list.get(0).getValueCount()); }
/** * An index in to an individual * {@link ArrowRecordBatch} * @param list the list of field vectors to use * @param schema the schema to use */ public ArrowWritableRecordTimeSeriesBatch(List<FieldVector> list, Schema schema,int timeSeriesStride) { this.list = list; this.schema = schema; //each column should have same number of rows this.timeSeriesStride = timeSeriesStride; this.size = list.size() * list.get(0).getValueCount() / timeSeriesStride; }
private void writeDictionaryBatches(JsonGenerator generator, Set<Long> dictionaryIdsUsed, DictionaryProvider provider) throws IOException { generator.writeArrayFieldStart("dictionaries"); for (Long id : dictionaryIdsUsed) { generator.writeStartObject(); generator.writeObjectField("id", id); generator.writeFieldName("data"); Dictionary dictionary = provider.lookup(id); FieldVector vector = dictionary.getVector(); List<Field> fields = Collections.singletonList(vector.getField()); List<FieldVector> vectors = Collections.singletonList(vector); VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount()); writeBatch(root); generator.writeEndObject(); } generator.writeEndArray(); }
private void appendNodes(FieldVector vector, List<ArrowFieldNode> nodes, List<ArrowBuf> buffers) { nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); List<ArrowBuf> fieldBuffers = vector.getFieldBuffers(); List<BufferType> expectedBuffers = TypeLayout.getTypeLayout(vector.getField().getType()).getBufferTypes(); if (fieldBuffers.size() != expectedBuffers.size()) { throw new IllegalArgumentException(String.format( "wrong number of buffers for field %s in vector %s. found: %s", vector.getField(), vector.getClass().getSimpleName(), fieldBuffers)); } buffers.addAll(fieldBuffers); for (FieldVector child : vector.getChildrenFromFields()) { appendNodes(child, nodes, buffers); } }
public VectorSchemaRoot(FieldVector parent) { this(parent.getField().getChildren(), parent.getChildrenFromFields(), parent.getValueCount()); }
/** * Validate two arrow FieldVectors are equal. * * @param vector1 the 1st VectorField to compare * @param vector2 the 2nd VectorField to compare * @throws IllegalArgumentException if they are different */ public static void compareFieldVectors(FieldVector vector1, FieldVector vector2) { Field field1 = vector1.getField(); if (!field1.equals(vector2.getField())) { throw new IllegalArgumentException("Different Fields:\n" + field1 + "\n!=\n" + vector2.getField()); } int valueCount = vector1.getValueCount(); if (valueCount != vector2.getValueCount()) { throw new IllegalArgumentException("Different value count for field " + field1 + " : " + valueCount + " != " + vector2.getValueCount()); } for (int j = 0; j < valueCount; j++) { Object obj1 = vector1.getObject(j); Object obj2 = vector2.getObject(j); if (!equals(field1.getType(), obj1, obj2)) { throw new IllegalArgumentException( "Different values in column:\n" + field1 + " at index " + j + ": " + obj1 + " != " + obj2); } } }
int valueCount = vector.getValueCount(); generator.writeObjectField("count", valueCount);
private void readDictionaryBatches() throws JsonParseException, IOException { readToken(START_ARRAY); JsonToken token = parser.nextToken(); boolean haveDictionaryBatch = token == START_OBJECT; while (haveDictionaryBatch) { // Lookup what dictionary for the batch about to be read long id = readNextField("id", Long.class); Dictionary dict = dictionaries.get(id); if (dict == null) { throw new IllegalArgumentException("Dictionary with id: " + id + " missing encoding from schema Field"); } // Read the dictionary record batch nextFieldIs("data"); FieldVector vector = dict.getVector(); List<Field> fields = Collections.singletonList(vector.getField()); List<FieldVector> vectors = Collections.singletonList(vector); VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount()); read(root); readToken(END_OBJECT); token = parser.nextToken(); haveDictionaryBatch = token == START_OBJECT; } if (token != END_ARRAY) { throw new IllegalArgumentException("Invalid token: " + token + " expected end of array at " + parser.getTokenLocation()); } }
@Override public boolean next(NullWritable key, ArrowWrapperWritable value) throws IOException { try { // Need a way to know what thread to interrupt, since this is a blocking thread. setReaderThread(Thread.currentThread()); boolean hasInput = arrowStreamReader.loadNextBatch(); if (hasInput) { VectorSchemaRoot vectorSchemaRoot = arrowStreamReader.getVectorSchemaRoot(); //There must be at least one column vector Preconditions.checkState(vectorSchemaRoot.getFieldVectors().size() > 0); if(vectorSchemaRoot.getFieldVectors().get(0).getValueCount() == 0) { //An empty batch will appear at the end of the stream return false; } value.setVectorSchemaRoot(arrowStreamReader.getVectorSchemaRoot()); return true; } else { processReaderEvent(); return false; } } catch (IOException io) { failOnInterruption(io); return false; } }
/** * Convert a field vector to a column vector * @param fieldVector the field vector to convert * @param type the type of the column vector * @return the converted ndarray */ public static INDArray convertArrowVector(FieldVector fieldVector,ColumnType type) { DataBuffer buffer = null; int cols = fieldVector.getValueCount(); ByteBuffer direct = ByteBuffer.allocateDirect(fieldVector.getDataBuffer().capacity()); direct.order(ByteOrder.nativeOrder()); fieldVector.getDataBuffer().getBytes(0,direct); direct.rewind(); switch(type) { case Integer: buffer = Nd4j.createBuffer(direct, DataBuffer.Type.INT,cols,0); break; case Float: buffer = Nd4j.createBuffer(direct, DataBuffer.Type.FLOAT,cols); break; case Double: buffer = Nd4j.createBuffer(direct, DataBuffer.Type.DOUBLE,cols); break; case Long: buffer = Nd4j.createBuffer(direct, DataBuffer.Type.LONG,cols); break; } return Nd4j.create(buffer,new int[] {cols,1}); }
batchSize = vectors.get(0).getValueCount(); ArrowWrapperWritable wrapper = new ArrowWrapperWritable(batchData.getVectorSchemaRoot()); currentBatch = (Object[][]) serde.deserialize(wrapper);
Dictionary dictionary = provider.lookup(id); FieldVector vector = dictionary.getVector(); int count = vector.getValueCount(); VectorSchemaRoot dictRoot = new VectorSchemaRoot( Collections.singletonList(vector.getField()),
if(from.getValueCount() < item) { throw new IllegalArgumentException("Index specified greater than the number of items in the vector with length " + from.getValueCount());
final Reallocator realloc = this.realloc; VariableLengthValidator.validateVariable(source, source.getValueCount());