public VectorSchemaRoot(Schema schema, List<FieldVector> fieldVectors, int rowCount) { if (schema.getFields().size() != fieldVectors.size()) { throw new IllegalArgumentException("Fields must match field vectors. Found " + fieldVectors.size() + " vectors and " + schema.getFields().size() + " fields"); } this.schema = schema; this.rowCount = rowCount; this.fieldVectors = fieldVectors; for (int i = 0; i < schema.getFields().size(); ++i) { Field field = schema.getFields().get(i); FieldVector vector = fieldVectors.get(i); fieldVectorsMap.put(field.getName(), vector); } }
public static VectorSchemaRoot create(Schema schema, BufferAllocator allocator) { List<FieldVector> fieldVectors = new ArrayList<>(); for (Field field : schema.getFields()) { FieldVector vector = field.createVector(allocator); fieldVectors.add(vector); } if (fieldVectors.size() != schema.getFields().size()) { throw new IllegalArgumentException("The root vector did not create the right number of children. found " + fieldVectors.size() + " expected " + schema.getFields().size()); } return new VectorSchemaRoot(schema, fieldVectors, 0); }
public static TypedFieldId getFieldId(Schema schema, SchemaPath path, boolean isHyper){ int i = 0; for (Field f : schema.getFields()) { TypedFieldId id = getFieldId(f, i, path, isHyper); if (id != null) { return id; } i++; } return null; }
/** * Search for a field by name in this Schema. * * @param name the name of the field to return * @return the corresponding field * @throws IllegalArgumentException if the field was not found */ public Field findField(String name) { return findField(getFields(), name); }
/** * Convert an {@link org.apache.arrow.vector.types.pojo.Schema} * to a datavec {@link Schema} * @param schema the input arrow schema * @return the equivalent datavec schema */ public static Schema toDatavecSchema(org.apache.arrow.vector.types.pojo.Schema schema) { Schema.Builder schemaBuilder = new Schema.Builder(); for (int i = 0; i < schema.getFields().size(); i++) { schemaBuilder.addColumn(metaDataFromField(schema.getFields().get(i))); } return schemaBuilder.build(); }
public String contentToTSVString() { StringBuilder sb = new StringBuilder(); List<Object> row = new ArrayList<>(schema.getFields().size()); for (Field field : schema.getFields()) { row.add(field.getName()); } printRow(sb, row); for (int i = 0; i < rowCount; i++) { row.clear(); for (FieldVector v : fieldVectors) { row.add(v.getObject(i)); } printRow(sb, row); } return sb.toString(); } }
public void start(Schema schema, DictionaryProvider provider) throws IOException { List<Field> fields = new ArrayList<>(schema.getFields().size()); Set<Long> dictionaryIdsUsed = new HashSet<>(); this.schema = schema; // Store original Schema to ensure batches written match // Convert fields with dictionaries to have dictionary type for (Field field : schema.getFields()) { fields.add(DictionaryUtility.toMessageFormat(field, provider, dictionaryIdsUsed)); } Schema updatedSchema = new Schema(fields, schema.getCustomMetadata()); generator.writeStartObject(); generator.writeObjectField("schema", updatedSchema); // Write all dictionaries that were used if (!dictionaryIdsUsed.isEmpty()) { writeDictionaryBatches(generator, dictionaryIdsUsed, provider); } // Start writing of record batches generator.writeArrayFieldStart("batches"); }
public ExpandableHyperContainer(BufferAllocator allocator, Schema schema, BitSet isKeyBits) { super(allocator); this.isKeyBits = isKeyBits; int i=0; for(Field f : schema.getFields()){ /* If the bit is not set, the corresponding field will be added to hyper container, * otherwise the field will be ignored. */ if (!this.isKeyBits.get(i)) { this.addEmptyHyper(f); } i ++; } this.buildSchema(SelectionVectorMode.FOUR_BYTE); }
public static CompleteType deserialize(byte[] bytes) { Schema schema = Schema.getRootAsSchema(ByteBuffer.wrap(bytes)); org.apache.arrow.vector.types.pojo.Schema s = org.apache.arrow.vector.types.pojo.Schema.convertSchema(schema); return CompleteType.fromField(s.getFields().get(0)); }
public ExpandableHyperContainer(BufferAllocator allocator, Schema schema) { super(allocator); // Add all key fields for VECTORIZED_BIGINT mode this.isKeyBits = null; int i=0; for(Field f : schema.getFields()){ this.addEmptyHyper(f); } this.buildSchema(SelectionVectorMode.FOUR_BYTE); }
public void addSchema(Schema schema){ clearSchema(); for(Field f : schema.getFields()) { addOrGet(f); } }
public ParquetRecordMaterializer(OutputMutator mutator, ComplexWriter complexWriter, MessageType schema, Collection<SchemaPath> columns, OptionManager options, Schema arrowSchema, SchemaDerivationHelper schemaHelper) { this.complexWriter = complexWriter; root = new StructGroupConverter( mutator, complexWriter.rootAsStruct(), schema, columns, options, arrowSchema == null ? null : arrowSchema.getFields(), schemaHelper ); }
public static BatchSchema deserialize(byte[] bytes) { Schema schema = Schema.getRootAsSchema(ByteBuffer.wrap(bytes)); org.apache.arrow.vector.types.pojo.Schema s = org.apache.arrow.vector.types.pojo.Schema.convertSchema(schema); return new BatchSchema(SelectionVectorMode.NONE, s.getFields()); }
private void writeBatch(VectorSchemaRoot recordBatch) throws IOException { generator.writeStartObject(); { generator.writeObjectField("count", recordBatch.getRowCount()); generator.writeArrayFieldStart("columns"); for (Field field : recordBatch.getSchema().getFields()) { FieldVector vector = recordBatch.getVector(field.getName()); writeFromVectorIntoJson(field, vector); } generator.writeEndArray(); } generator.writeEndObject(); }
/** * Maps a Parquet and Arrow Schema * For now does not validate primitive type compatibility * @param arrowSchema an Arrow schema * @param parquetSchema a Parquet message type * @return the mapping between the 2 */ public SchemaMapping map(Schema arrowSchema, MessageType parquetSchema) { List<TypeMapping> children = map(arrowSchema.getFields(), parquetSchema.getFields()); return new SchemaMapping(arrowSchema, parquetSchema, children); }
public static VectorContainer create(BufferAllocator allocator, Schema schema){ VectorContainer container = new VectorContainer(allocator); for (Field field : schema.getFields()) { container.addOrGet(field); } container.buildSchema(SelectionVectorMode.NONE); return container; }
public static String[] getColumnsLowerCase(DatasetConfig datasetConfig) { final ByteString schemaBytes = DatasetHelper.getSchemaBytes(datasetConfig); if (schemaBytes != null) { Schema schema = Schema.getRootAsSchema(schemaBytes.asReadOnlyByteBuffer()); org.apache.arrow.vector.types.pojo.Schema s = org.apache.arrow.vector.types.pojo.Schema.convertSchema(schema); return s.getFields().stream().map(input -> input.getName().toLowerCase()).toArray(String[]::new); } else { // If virtual dataset was created with view fields if (datasetConfig.getType() == DatasetType.VIRTUAL_DATASET) { final List<ViewFieldType> viewFieldTypes = datasetConfig.getVirtualDataset().getSqlFieldsList(); if (notEmpty(viewFieldTypes)) { return viewFieldTypes.stream().map(input -> input.getName().toLowerCase()).toArray(String[]::new); } } } return new String[0]; }
public static BatchSchema deserialize(ByteString bytes) { Schema schema = Schema.getRootAsSchema(bytes.asReadOnlyByteBuffer()); org.apache.arrow.vector.types.pojo.Schema s = org.apache.arrow.vector.types.pojo.Schema.convertSchema(schema); return new BatchSchema(SelectionVectorMode.NONE, s.getFields()); }
/** * Creates a Parquet Schema from an Arrow one and returns the mapping * @param arrowSchema the provided Arrow Schema * @return the mapping between the 2 */ public SchemaMapping fromArrow(Schema arrowSchema) { List<Field> fields = arrowSchema.getFields(); List<TypeMapping> parquetFields = fromArrow(fields); MessageType parquetType = addToBuilder(parquetFields, Types.buildMessage()).named("root"); return new SchemaMapping(arrowSchema, parquetType, parquetFields); }
public boolean read(VectorSchemaRoot root) throws IOException { JsonToken t = parser.nextToken(); if (t == START_OBJECT) { { int count = readNextField("count", Integer.class); root.setRowCount(count); nextFieldIs("columns"); readToken(START_ARRAY); { for (Field field : root.getSchema().getFields()) { FieldVector vector = root.getVector(field.getName()); readFromJsonIntoVector(field, vector); } } readToken(END_ARRAY); } readToken(END_OBJECT); return true; } else if (t == END_ARRAY) { root.setRowCount(0); return false; } else { throw new IllegalArgumentException("Invalid token: " + t); } }