public ArrowWrapperWritable emptyBatch() { rootVector.setValueCount(0); for (int fieldIndex = 0; fieldIndex < fieldTypeInfos.size(); fieldIndex++) { final TypeInfo fieldTypeInfo = fieldTypeInfos.get(fieldIndex); final String fieldName = fieldNames.get(fieldIndex); final FieldType fieldType = toFieldType(fieldTypeInfo); final FieldVector arrowVector = rootVector.addOrGet(fieldName, fieldType, FieldVector.class); arrowVector.setInitialCapacity(0); arrowVector.allocateNew(); } VectorSchemaRoot vectorSchemaRoot = new VectorSchemaRoot(rootVector); return new ArrowWrapperWritable(vectorSchemaRoot, allocator, rootVector); }
VectorSchemaRoot vectorSchemaRoot = new VectorSchemaRoot(rootVector); return new ArrowWrapperWritable(vectorSchemaRoot, allocator, rootVector);
public static VectorSchemaRoot of(FieldVector... vectors) { return new VectorSchemaRoot(Arrays.stream(vectors).collect(Collectors.toList())); }
public static VectorSchemaRoot create(Schema schema, BufferAllocator allocator) { List<FieldVector> fieldVectors = new ArrayList<>(); for (Field field : schema.getFields()) { FieldVector vector = field.createVector(allocator); fieldVectors.add(vector); } if (fieldVectors.size() != schema.getFields().size()) { throw new IllegalArgumentException("The root vector did not create the right number of children. found " + fieldVectors.size() + " expected " + schema.getFields().size()); } return new VectorSchemaRoot(schema, fieldVectors, 0); }
public static VectorSchemaRoot getVectorSchemaRoot(final VectorAccessible batch) { List<FieldVector> fieldVectors = FluentIterable.from(batch) .transform(new Function<VectorWrapper<?>, FieldVector>() { @Override public FieldVector apply(VectorWrapper<?> vectorWrapper) { return (FieldVector)vectorWrapper.getValueVector(); } }).toList(); int rowCount = batch.getRecordCount(); List<Field> fields = batch.getSchema().getFields(); VectorSchemaRoot root = new VectorSchemaRoot(fields , fieldVectors, rowCount); return root; }
/** * Creates the vector schema from incoming container and referenced fields. * @param input * @param referencedFields * @return the vector schema root. */ public static VectorSchemaRoot getSchemaRoot(VectorAccessible input, Set referencedFields) { List<FieldVector> fv = ImmutableList.copyOf(input) .stream() .map(vw -> ((FieldVector)vw.getValueVector())) .filter(fVec -> referencedFields.contains(fVec.getField())) .collect(Collectors.toList()); List<Field> fields = fv.stream() .map(fieldVec -> fieldVec.getField()) .collect(Collectors.toList()); Schema schemaWithOnlyReferencedFields = new Schema(fields); VectorSchemaRoot root = new VectorSchemaRoot( schemaWithOnlyReferencedFields, fv, 0 ); return root; } }
private void writeDictionaryBatches(JsonGenerator generator, Set<Long> dictionaryIdsUsed, DictionaryProvider provider) throws IOException { generator.writeArrayFieldStart("dictionaries"); for (Long id : dictionaryIdsUsed) { generator.writeStartObject(); generator.writeObjectField("id", id); generator.writeFieldName("data"); Dictionary dictionary = provider.lookup(id); FieldVector vector = dictionary.getVector(); List<Field> fields = Collections.singletonList(vector.getField()); List<FieldVector> vectors = Collections.singletonList(vector); VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount()); writeBatch(root); generator.writeEndObject(); } generator.writeEndArray(); }
/** * Reads the schema and initializes the vectors. */ private void initialize() throws IOException { Schema originalSchema = readSchema(); List<Field> fields = new ArrayList<>(); List<FieldVector> vectors = new ArrayList<>(); Map<Long, Dictionary> dictionaries = new HashMap<>(); // Convert fields with dictionaries to have the index type for (Field field : originalSchema.getFields()) { Field updated = DictionaryUtility.toMemoryFormat(field, allocator, dictionaries); fields.add(updated); vectors.add(updated.createVector(allocator)); } Schema schema = new Schema(fields, originalSchema.getCustomMetadata()); this.root = new VectorSchemaRoot(schema, vectors, 0); this.loader = new VectorLoader(root); this.dictionaries = Collections.unmodifiableMap(dictionaries); // Read and load all dictionaries from schema for (int i = 0; i < dictionaries.size(); i++) { ArrowDictionaryBatch dictionaryBatch = readDictionary(); loadDictionary(dictionaryBatch); } }
/** * Load an ArrowDictionaryBatch to the readers dictionary vectors. * * @param dictionaryBatch dictionary batch to load */ protected void loadDictionary(ArrowDictionaryBatch dictionaryBatch) { long id = dictionaryBatch.getDictionaryId(); Dictionary dictionary = dictionaries.get(id); if (dictionary == null) { throw new IllegalArgumentException("Dictionary ID " + id + " not defined in schema"); } FieldVector vector = dictionary.getVector(); VectorSchemaRoot root = new VectorSchemaRoot( Collections.singletonList(vector.getField()), Collections.singletonList(vector), 0); VectorLoader loader = new VectorLoader(root); try { loader.load(dictionaryBatch.getDictionary()); } finally { dictionaryBatch.close(); } } }
private void readDictionaryBatches() throws JsonParseException, IOException { readToken(START_ARRAY); JsonToken token = parser.nextToken(); boolean haveDictionaryBatch = token == START_OBJECT; while (haveDictionaryBatch) { // Lookup what dictionary for the batch about to be read long id = readNextField("id", Long.class); Dictionary dict = dictionaries.get(id); if (dict == null) { throw new IllegalArgumentException("Dictionary with id: " + id + " missing encoding from schema Field"); } // Read the dictionary record batch nextFieldIs("data"); FieldVector vector = dict.getVector(); List<Field> fields = Collections.singletonList(vector.getField()); List<FieldVector> vectors = Collections.singletonList(vector); VectorSchemaRoot root = new VectorSchemaRoot(fields, vectors, vector.getValueCount()); read(root); readToken(END_OBJECT); token = parser.nextToken(); haveDictionaryBatch = token == START_OBJECT; } if (token != END_ARRAY) { throw new IllegalArgumentException("Invalid token: " + token + " expected end of array at " + parser.getTokenLocation()); } }
FieldVector vector = dictionary.getVector(); int count = vector.getValueCount(); VectorSchemaRoot dictRoot = new VectorSchemaRoot( Collections.singletonList(vector.getField()), Collections.singletonList(vector),
List<FieldVector> columns = toArrowColumns(bufferAllocator,inputSchema,recordBatch); try { VectorSchemaRoot root = new VectorSchemaRoot(convertedSchema,columns,recordBatch.size()); val pair = toArrowColumns(bufferAllocator,inputSchema,recordBatch); try { VectorSchemaRoot root = new VectorSchemaRoot(convertedSchema,pair,recordBatch.size());