public VectorSchemaRoot(List<Field> fields, List<FieldVector> fieldVectors, int rowCount) { this(new Schema(fields), fieldVectors, rowCount); }
@Override public Schema deserialize(JsonParser jsonParser, DeserializationContext deserializationContext) throws IOException, JsonProcessingException { JsonNode node = jsonParser.getCodec().readTree(jsonParser); JsonNode metadataNode = node.get("metadata"); Map<String,String> metadata = mapper.convertValue(metadataNode, Map.class); JsonNode fieldsNode = node.get("fields"); Iterable<Field> fields = fieldsReader.readValue(fieldsNode); return new Schema(fields, metadata); }
public int serialize(FlatBufferBuilder builder) { org.apache.arrow.vector.types.pojo.Schema schema = new org.apache.arrow.vector.types.pojo.Schema(Collections.singletonList(this.toField("f"))); return schema.getSchema(builder); }
/** * Creates an Arrow Schema from an Parquet one and returns the mapping * @param parquetSchema the provided Parquet Schema * @return the mapping between the 2 */ public SchemaMapping fromParquet(MessageType parquetSchema) { List<Type> fields = parquetSchema.getFields(); List<TypeMapping> mappings = fromParquet(fields); List<Field> arrowFields = fields(mappings); return new SchemaMapping(new Schema(arrowFields), parquetSchema, mappings); }
/** * Creates the vector schema from incoming container and referenced fields. * @param input * @param referencedFields * @return the vector schema root. */ public static VectorSchemaRoot getSchemaRoot(VectorAccessible input, Set referencedFields) { List<FieldVector> fv = ImmutableList.copyOf(input) .stream() .map(vw -> ((FieldVector)vw.getValueVector())) .filter(fVec -> referencedFields.contains(fVec.getField())) .collect(Collectors.toList()); List<Field> fields = fv.stream() .map(fieldVec -> fieldVec.getField()) .collect(Collectors.toList()); Schema schemaWithOnlyReferencedFields = new Schema(fields); VectorSchemaRoot root = new VectorSchemaRoot( schemaWithOnlyReferencedFields, fv, 0 ); return root; } }
public int serialize(FlatBufferBuilder builder) { Preconditions.checkArgument(selectionVectorMode == SelectionVectorMode.NONE, "Serialization is only allowed for SelectionVectorMode.NONE. This was in SelectionVectorMode.%s", selectionVectorMode.name()); org.apache.arrow.vector.types.pojo.Schema schema = new org.apache.arrow.vector.types.pojo.Schema(getFields()); return schema.getSchema(builder); }
public void start(Schema schema, DictionaryProvider provider) throws IOException { List<Field> fields = new ArrayList<>(schema.getFields().size()); Set<Long> dictionaryIdsUsed = new HashSet<>(); this.schema = schema; // Store original Schema to ensure batches written match // Convert fields with dictionaries to have dictionary type for (Field field : schema.getFields()) { fields.add(DictionaryUtility.toMessageFormat(field, provider, dictionaryIdsUsed)); } Schema updatedSchema = new Schema(fields, schema.getCustomMetadata()); generator.writeStartObject(); generator.writeObjectField("schema", updatedSchema); // Write all dictionaries that were used if (!dictionaryIdsUsed.isEmpty()) { writeDictionaryBatches(generator, dictionaryIdsUsed, provider); } // Start writing of record batches generator.writeArrayFieldStart("batches"); }
/** * Convert a data vec {@link Schema} * to an arrow {@link org.apache.arrow.vector.types.pojo.Schema} * @param schema the input schema * @return the schema for arrow */ public static org.apache.arrow.vector.types.pojo.Schema toArrowSchema(Schema schema) { List<Field> fields = new ArrayList<>(schema.numColumns()); for(int i = 0; i < schema.numColumns(); i++) { fields.add(getFieldForColumn(schema.getName(i),schema.getType(i))); } return new org.apache.arrow.vector.types.pojo.Schema(fields); }
public static Schema convertSchema(org.apache.arrow.flatbuf.Schema schema) { List<Field> fields = new ArrayList<>(); for (int i = 0; i < schema.fieldsLength(); i++) { fields.add(convertField(schema.fields(i))); } Map<String, String> metadata = new HashMap<>(); for (int i = 0; i < schema.customMetadataLength(); i++) { KeyValue kv = schema.customMetadata(i); String key = kv.key(); String value = kv.value(); metadata.put(key == null ? "" : key, value == null ? "" : value); } return new Schema(Collections2.immutableListCopy(fields), Collections2.immutableMapCopy(metadata)); }
/** * Reads the schema and initializes the vectors. */ private void initialize() throws IOException { Schema originalSchema = readSchema(); List<Field> fields = new ArrayList<>(); List<FieldVector> vectors = new ArrayList<>(); Map<Long, Dictionary> dictionaries = new HashMap<>(); // Convert fields with dictionaries to have the index type for (Field field : originalSchema.getFields()) { Field updated = DictionaryUtility.toMemoryFormat(field, allocator, dictionaries); fields.add(updated); vectors.add(updated.createVector(allocator)); } Schema schema = new Schema(fields, originalSchema.getCustomMetadata()); this.root = new VectorSchemaRoot(schema, vectors, 0); this.loader = new VectorLoader(root); this.dictionaries = Collections.unmodifiableMap(dictionaries); // Read and load all dictionaries from schema for (int i = 0; i < dictionaries.size(); i++) { ArrowDictionaryBatch dictionaryBatch = readDictionary(); loadDictionary(dictionaryBatch); } }
public Schema start() throws JsonParseException, IOException { readToken(START_OBJECT); { Schema originalSchema = readNextField("schema", Schema.class); List<Field> fields = new ArrayList<>(); dictionaries = new HashMap<>(); // Convert fields with dictionaries to have the index type for (Field field : originalSchema.getFields()) { fields.add(DictionaryUtility.toMemoryFormat(field, allocator, dictionaries)); } this.schema = new Schema(fields, originalSchema.getCustomMetadata()); if (!dictionaries.isEmpty()) { nextFieldIs("dictionaries"); readDictionaryBatches(); } nextFieldIs("batches"); readToken(START_ARRAY); started = true; return this.schema; } }
private void newSchema() throws IOException { // Reset it to half of current number and bound it within the limits recordCountForNextMemCheck = min(max(MINIMUM_RECORD_COUNT_FOR_CHECK, recordCountForNextMemCheck / 2), MAXIMUM_RECORD_COUNT_FOR_CHECK); String json = new Schema(batchSchema).toJson(); extraMetaData.put(DREMIO_ARROW_SCHEMA_2_1, json); List<Type> types = Lists.newArrayList(); for (Field field : batchSchema) { if (field.getName().equalsIgnoreCase(WriterPrel.PARTITION_COMPARATOR_FIELD)) { continue; } Type childType = getType(field); if (childType != null) { types.add(childType); } } Preconditions.checkState(types.size() > 0, "No types for parquet schema"); schema = new MessageType("root", types); int dictionarySize = (int)context.getOptions().getOption(ExecConstants.PARQUET_DICT_PAGE_SIZE_VALIDATOR); final ParquetProperties parquetProperties = new ParquetProperties(dictionarySize, writerVersion, enableDictionary, new ParquetDirectByteBufferAllocator(columnEncoderAllocator), pageSize, true, enableDictionaryForBinary); pageStore = ColumnChunkPageWriteStoreExposer.newColumnChunkPageWriteStore(codecFactory.getCompressor(codec), schema, parquetProperties); store = new ColumnWriteStoreV1(pageStore, pageSize, parquetProperties); MessageColumnIO columnIO = new ColumnIOFactory(false).getColumnIO(this.schema); consumer = columnIO.getRecordWriter(store); setUp(schema, consumer); }
this.schema = new Schema(fields, root.getSchema().getCustomMetadata());
@Test public void testDataSetSchema() throws Exception { try( final KVStoreProvider kvstore = new LocalKVStoreProvider(DremioTest.CLASSPATH_SCAN_RESULT, null, true, false); ) { kvstore.start(); final NamespaceService ns = new NamespaceServiceImpl(kvstore); Field field1 = new Field("a", true, new Int(32, true), null); Field child1 = new Field("c", true, Utf8.INSTANCE, null); Field field2 = new Field("b", true, Struct.INSTANCE, ImmutableList.of(child1)); Schema schema = new Schema(ImmutableList.of(field1, field2)); FlatBufferBuilder builder = new FlatBufferBuilder(); schema.getSchema(builder); builder.finish(schema.getSchema(builder)); addSource(ns, "s"); addPhysicalDS(ns, "s.foo", builder.sizedByteArray()); ByteBuffer bb = ByteBuffer.wrap(DatasetHelper.getSchemaBytes(ns.getDataset(new NamespaceKey(PathUtils.parseFullPath("s.foo")))).toByteArray()); Schema returnedSchema = Schema.convertSchema(org.apache.arrow.flatbuf.Schema.getRootAsSchema(bb)); assertEquals(schema, returnedSchema); } }