schemaTypes.add(Types.buildGroup(groupFieldType.getRepetition()) .addFields(typesArray) .named(fieldType.getName()) ); } else {
private static void addGroupType(String t, Tokenizer st, Repetition r, GroupBuilder<?> builder) { GroupBuilder<?> childBuilder = builder.group(r); String name = st.nextToken(); // Read annotation, if any. t = st.nextToken(); OriginalType originalType = null; if (t.equalsIgnoreCase("(")) { originalType = OriginalType.valueOf(st.nextToken()); childBuilder.as(originalType); check(st.nextToken(), ")", "original type ended by )", st); t = st.nextToken(); } if (t.equals("=")) { childBuilder.id(Integer.parseInt(st.nextToken())); t = st.nextToken(); } try { addGroupTypeFields(t, st, childBuilder); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("problem reading type: type = group, name = " + name + ", original type = " + originalType, e); } childBuilder.named(name); }
/** * Searchs column names by name on a given Parquet message schema, and returns its projected * Parquet schema types. * * @param schema Message type schema where to search for column names. * @param colNames List of column names. * @param colTypes List of column types. * @return A MessageType object of projected columns. */ private static MessageType getSchemaByName(MessageType schema, List<String> colNames, List<TypeInfo> colTypes) { List<Type> projectedFields = getProjectedGroupFields(schema, colNames, colTypes); Type[] typesArray = projectedFields.toArray(new Type[0]); return Types.buildMessage() .addFields(typesArray) .named(schema.getName()); }
.as(parquet.schema.OriginalType.LIST).repeatedGroup() .optional(PrimitiveType.PrimitiveTypeName.INT32).named("element") .named("list").named("int_list").named("ArrayOfInts"); .as(parquet.schema.OriginalType.LIST).repeatedGroup().requiredGroup() .as(OriginalType.LIST).repeatedGroup() .required(PrimitiveType.PrimitiveTypeName.INT32).named("element").named("list") .named("element").named("list").named("int_list_list").named("ArrayOfArrayOfInts"); .repeated(PrimitiveType.PrimitiveTypeName.INT32).named("element").named("int_list") .named("ArrayOfInts"); .as(parquet.schema.OriginalType.LIST).repeatedGroup() .required(PrimitiveType.PrimitiveTypeName.BINARY).named("str") .required(PrimitiveType.PrimitiveTypeName.INT32).named("num").named("element") .named("tuple_list").named("ArrayOfTuples"); .required(PrimitiveType.PrimitiveTypeName.BINARY).named("str").named("array") .named("one_tuple_list").named("ArrayOfOneTuples"); .as(parquet.schema.OriginalType.LIST).repeatedGroup() .required(PrimitiveType.PrimitiveTypeName.BINARY).named("str") .named("one_tuple_list_tuple").named("one_tuple_list").named("ArrayOfOneTuples2"); .as(parquet.schema.OriginalType.LIST).repeatedGroup() .required(PrimitiveType.PrimitiveTypeName.BINARY).named("str") .named("one_tuple_list").named("one_tuple_list").named("ArrayOfOneTuples3"); .required(PrimitiveType.PrimitiveTypeName.BINARY).as(OriginalType.UTF8)
@Override protected Type buildSchema() { JsonElementConverter elementConverter = this.elementConverter; JsonElementConverter keyConverter = getKeyConverter(); GroupType mapGroup = Types.repeatedGroup().addFields(keyConverter.schema(), elementConverter.schema()).named(MAP_KEY) .asGroupType(); String columnName = this.jsonSchema.getColumnName(); switch (this.jsonSchema.optionalOrRequired()) { case OPTIONAL: return Types.optionalGroup().addFields(mapGroup).named(columnName).asGroupType(); case REQUIRED: return Types.requiredGroup().addFields(mapGroup).named(columnName).asGroupType(); default: return null; } }
@Override protected Type buildSchema() { JsonElementConverter elementConverter = this.elementConverter; JsonElementConverter keyConverter = getKeyConverter(); GroupType mapGroup = Types.repeatedGroup().addFields(keyConverter.schema(), elementConverter.schema()).named(MAP_KEY) .asGroupType(); String columnName = this.jsonSchema.getColumnName(); switch (this.jsonSchema.optionalOrRequired()) { case OPTIONAL: return Types.optionalGroup().addFields(mapGroup).named(columnName).asGroupType(); case REQUIRED: return Types.requiredGroup().addFields(mapGroup).named(columnName).asGroupType(); default: return null; } }