private static MessageType readParquetSchema(List<SchemaElement> schema) { Iterator<SchemaElement> schemaIterator = schema.iterator(); SchemaElement rootSchema = schemaIterator.next(); Types.MessageTypeBuilder builder = Types.buildMessage(); readTypeSchema(builder, schemaIterator, rootSchema.getNum_children()); return builder.named(rootSchema.name); }
MessageType schema = parquet.schema.Types.buildMessage().optionalGroup() .as(parquet.schema.OriginalType.LIST).repeatedGroup() .optional(PrimitiveType.PrimitiveTypeName.INT32).named("element") schema = parquet.schema.Types.buildMessage().optionalGroup() .as(parquet.schema.OriginalType.LIST).repeatedGroup().requiredGroup() .as(OriginalType.LIST).repeatedGroup() schema = parquet.schema.Types.buildMessage().optionalGroup() .as(parquet.schema.OriginalType.LIST) .repeated(PrimitiveType.PrimitiveTypeName.INT32).named("element").named("int_list") schema = parquet.schema.Types.buildMessage().optionalGroup() .as(parquet.schema.OriginalType.LIST).repeatedGroup() .required(PrimitiveType.PrimitiveTypeName.BINARY).named("str") schema = parquet.schema.Types.buildMessage().optionalGroup() .as(parquet.schema.OriginalType.LIST).repeatedGroup() .required(PrimitiveType.PrimitiveTypeName.BINARY).named("str").named("array") schema = parquet.schema.Types.buildMessage().optionalGroup() .as(parquet.schema.OriginalType.LIST).repeatedGroup() .required(PrimitiveType.PrimitiveTypeName.BINARY).named("str") schema = parquet.schema.Types.buildMessage().optionalGroup() .as(parquet.schema.OriginalType.LIST).repeatedGroup() .required(PrimitiveType.PrimitiveTypeName.BINARY).named("str") schema = parquet.schema.Types.buildMessage().optionalGroup()
/** * Searchs column names by name on a given Parquet message schema, and returns its projected * Parquet schema types. * * @param schema Message type schema where to search for column names. * @param colNames List of column names. * @param colTypes List of column types. * @return A MessageType object of projected columns. */ private static MessageType getSchemaByName(MessageType schema, List<String> colNames, List<TypeInfo> colTypes) { List<Type> projectedFields = getProjectedGroupFields(schema, colNames, colTypes); Type[] typesArray = projectedFields.toArray(new Type[0]); return Types.buildMessage() .addFields(typesArray) .named(schema.getName()); }
private MessageTypeBuilder() { super(MessageType.class); repetition(Type.Repetition.REQUIRED); }
private static MessageType readParquetSchema(List<SchemaElement> schema) { Iterator<SchemaElement> schemaIterator = schema.iterator(); SchemaElement rootSchema = schemaIterator.next(); Types.MessageTypeBuilder builder = Types.buildMessage(); readTypeSchema(builder, schemaIterator, rootSchema.getNum_children()); return builder.named(rootSchema.name); }
MessageType fromParquetSchema(List<SchemaElement> schema) { Iterator<SchemaElement> iterator = schema.iterator(); SchemaElement root = iterator.next(); Types.MessageTypeBuilder builder = Types.buildMessage(); buildChildren(builder, iterator, root.getNum_children()); return builder.named(root.name); }
private static MessageType parse(String schemaString) { Tokenizer st = new Tokenizer(schemaString, " ;{}()\n\t"); Types.MessageTypeBuilder builder = Types.buildMessage(); String t = st.nextToken(); check(t, "message", "start with 'message'", st); String name = st.nextToken(); addGroupTypeFields(st.nextToken(), st, builder); return builder.named(name); }
/** * Returns a builder to construct a {@link MessageType}. * * @return a {@link MessageTypeBuilder} */ public static MessageTypeBuilder buildMessage() { return new MessageTypeBuilder(); }