@Test public void testSchemaWithOptionalOptionalRequiredFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " optional group b {" + " optional group c {" + " required binary d (UTF8);" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", VARCHAR))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<String> dValues = asList("d1", "d2", "d3", "d4", "d5", "d6", "d7"); Iterable<List> cValues = createNullableTestStructs(dValues); Iterable<List> bValues = createNullableTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaStringObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
private static void addType(String t, Tokenizer st, Types.GroupBuilder builder) { Repetition repetition = asRepetition(t, st); // Read type. String type = st.nextToken(); if ("group".equalsIgnoreCase(type)) { addGroupType(t, st, repetition, builder); } else { addPrimitiveType(t, st, asPrimitive(type, st), repetition, builder); } }
private static MessageType parse(String schemaString) { Tokenizer st = new Tokenizer(schemaString, " ;{}()\n\t"); Types.MessageTypeBuilder builder = Types.buildMessage(); String t = st.nextToken(); check(t, "message", "start with 'message'", st); String name = st.nextToken(); addGroupTypeFields(st.nextToken(), st, builder); return builder.named(name); }
private static void addGroupType(String t, Tokenizer st, Repetition r, GroupBuilder<?> builder) { GroupBuilder<?> childBuilder = builder.group(r); String name = st.nextToken(); // Read annotation, if any. t = st.nextToken(); OriginalType originalType = null; if (t.equalsIgnoreCase("(")) { originalType = OriginalType.valueOf(st.nextToken()); childBuilder.as(originalType); check(st.nextToken(), ")", "original type ended by )", st); t = st.nextToken(); } if (t.equals("=")) { childBuilder.id(Integer.parseInt(st.nextToken())); t = st.nextToken(); } try { addGroupTypeFields(t, st, childBuilder); } catch (IllegalArgumentException e) { throw new IllegalArgumentException("problem reading type: type = group, name = " + name + ", original type = " + originalType, e); } childBuilder.named(name); }
@Test public void testSchemaWithRequiredRequiredOptionalFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " required group b {" + " required group c {" + " optional int32 d;" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", INTEGER))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<Integer> dValues = asList(111, null, 333, 444, null, 666, 777); List<List> cValues = createTestStructs(dValues); List<List> bValues = createTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaIntObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
@Test public void testSchemaWithRequiredOptionalRequiredFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " required group b {" + " optional group c {" + " required binary d (UTF8);" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", VARCHAR))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<String> dValues = asList("d1", "d2", "d3", "d4", "d5", "d6", "d7"); Iterable<List> cValues = createNullableTestStructs(dValues); List<List> bValues = createTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaStringObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
@Test public void testSchemaWithOptionalRequiredOptionalFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " optional group b {" + " required group c {" + " optional int32 d;" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", INTEGER))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<Integer> dValues = asList(111, null, 333, 444, null, 666, 777); List<List> cValues = createTestStructs(dValues); Iterable<List> bValues = createNullableTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaIntObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
@Test public void testSchemaWithRequiredOptionalOptionalFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " required group b {" + " optional group c {" + " optional int32 d;" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", INTEGER))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<Integer> dValues = asList(111, null, 333, 444, null, 666, 777); Iterable<List> cValues = createNullableTestStructs(dValues); List<List> bValues = createTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaIntObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
@Test public void testSchemaWithRequiredStruct() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " required group a {" + " required group b {" + " required binary c (UTF8);" + " required int32 d;" + " }" + " required binary e (UTF8);" + " }" + "} "); Type bType = RowType.from(asList(field("c", VARCHAR), field("d", INTEGER))); Type aType = RowType.from(asList(field("b", bType), field("e", VARCHAR))); Iterable<String> cValues = limit(cycle(asList("c0", "c1", "c2", "c3", "c4", "c5", "c6", "c7")), 30000); Iterable<Integer> dValues = intsBetween(0, 30000); Iterable<String> eValues = limit(cycle(asList("e0", "e1", "e2", "e3", "e4", "e5", "e6", "e7")), 30000); List<List> bValues = createTestStructs(cValues, dValues); List<List> aValues = createTestStructs(bValues, eValues); ObjectInspector bInspector = getStandardStructObjectInspector(asList("c", "d"), asList(javaStringObjectInspector, javaIntObjectInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(asList("b", "e"), asList(bInspector, javaStringObjectInspector)); tester.assertRoundTrip(singletonList(aInspector), new Iterable<?>[] {aValues}, new Iterable<?>[] { aValues}, singletonList("a"), singletonList(aType), Optional.of(parquetSchema)); }
throws Exception MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " required group b {" +
public static MessageType getSchema(final Configuration configuration) { return MessageTypeParser.parseMessageType(configuration.get(PARQUET_HIVE_SCHEMA)); }
@Override public WriteContext init(Configuration configuration) { String schema = configuration.get(PARQUET_CASCADING_SCHEMA); rootSchema = MessageTypeParser.parseMessageType(schema); return new WriteContext(rootSchema, new HashMap<String, String>()); }
public DataWritableRecordConverter(final GroupType requestedSchema, final Map<String, String> metadata) { this.root = new HiveStructConverter(requestedSchema, MessageTypeParser.parseMessageType(metadata.get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)), metadata); }
@Override public WriteSupport.WriteContext init(final Configuration configuration) { final String schema = configuration.get("schema"); this.rootSchema = MessageTypeParser.parseMessageType(schema); return new WriteContext(this.rootSchema, new HashMap<String, String>()); }
/** * attempts to validate and construct a {@link MessageType} from a read projection schema * * @param fileMessageType the typed schema of the source * @param partialReadSchemaString the requested projection schema * @return the typed schema that should be used to read */ public static MessageType getSchemaForRead(MessageType fileMessageType, String partialReadSchemaString) { if (partialReadSchemaString == null) return fileMessageType; MessageType requestedMessageType = MessageTypeParser.parseMessageType(partialReadSchemaString); return getSchemaForRead(fileMessageType, requestedMessageType); }
public static MessageType getSchema(Configuration configuration) { return parseMessageType(checkNotNull(configuration.get(PARQUET_EXAMPLE_SCHEMA), PARQUET_EXAMPLE_SCHEMA)); }
private static long end(List<BlockMetaData> blocks, String requestedSchema) { MessageType requested = MessageTypeParser.parseMessageType(requestedSchema); long length = 0; for (BlockMetaData block : blocks) { List<ColumnChunkMetaData> columns = block.getColumns(); for (ColumnChunkMetaData column : columns) { if (requested.containsPath(column.getPath().toArray())) { length += column.getTotalSize(); } } } return length; }
@Test public void testSchemaWithOptionalRequiredOptionalFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " optional group b {" + " required group c {" + " optional int32 d;" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", INTEGER))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<Integer> dValues = asList(111, null, 333, 444, null, 666, 777); List<List> cValues = createTestStructs(dValues); Iterable<List> bValues = createNullableTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaIntObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
@Test public void testSchemaWithRequiredRequiredOptionalFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " required group b {" + " required group c {" + " optional int32 d;" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", INTEGER))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<Integer> dValues = asList(111, null, 333, 444, null, 666, 777); List<List> cValues = createTestStructs(dValues); List<List> bValues = createTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaIntObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }