fieldsMap.put(prefix, RowType.from(fieldsBuilder.build()));
private TypeCompatibility typeCompatibilityForRow(RowType firstType, RowType secondType) { List<Field> firstFields = firstType.getFields(); List<Field> secondFields = secondType.getFields(); if (firstFields.size() != secondFields.size()) { return TypeCompatibility.incompatible(); } ImmutableList.Builder<RowType.Field> fields = ImmutableList.builder(); boolean coercible = true; for (int i = 0; i < firstFields.size(); i++) { Type firstFieldType = firstFields.get(i).getType(); Type secondFieldType = secondFields.get(i).getType(); TypeCompatibility typeCompatibility = compatibility(firstFieldType, secondFieldType); if (!typeCompatibility.isCompatible()) { return TypeCompatibility.incompatible(); } Type commonParameterType = typeCompatibility.getCommonSuperType(); Optional<String> firstParameterName = firstFields.get(i).getName(); Optional<String> secondParameterName = secondFields.get(i).getName(); Optional<String> commonName = firstParameterName.equals(secondParameterName) ? firstParameterName : Optional.empty(); // ignore parameter name for coercible coercible &= typeCompatibility.isCoercible(); fields.add(new RowType.Field(commonName, commonParameterType)); } return TypeCompatibility.compatible(RowType.from(fields.build()), coercible); }
@Test public void testDoubleRowMap() { RowType innerRowType = RowType.from(ImmutableList.of( RowType.field("f1", BIGINT), RowType.field("f2", DOUBLE))); testMultimapAgg(DOUBLE, ImmutableList.of(1.0, 2.0, 3.0), innerRowType, ImmutableList.of(ImmutableList.of(1L, 1.0), ImmutableList.of(2L, 2.0), ImmutableList.of(3L, 3.0))); }
private static RowType createRowType(Type type) { return RowType.from(ImmutableList.of( RowType.field("a", type), RowType.field("b", type), RowType.field("c", type))); }
@Test public void testSchemaWithOptionalRequiredOptionalFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " optional group b {" + " required group c {" + " optional int32 d;" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", INTEGER))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<Integer> dValues = asList(111, null, 333, 444, null, 666, 777); List<List> cValues = createTestStructs(dValues); Iterable<List> bValues = createNullableTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaIntObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
@Test public void testSchemaWithRequiredOptionalOptionalFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " required group b {" + " optional group c {" + " optional int32 d;" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", INTEGER))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<Integer> dValues = asList(111, null, 333, 444, null, 666, 777); Iterable<List> cValues = createNullableTestStructs(dValues); List<List> bValues = createTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaIntObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
@Test public void testSchemaWithOptionalOptionalRequiredFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " optional group b {" + " optional group c {" + " required binary d (UTF8);" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", VARCHAR))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<String> dValues = asList("d1", "d2", "d3", "d4", "d5", "d6", "d7"); Iterable<List> cValues = createNullableTestStructs(dValues); Iterable<List> bValues = createNullableTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaStringObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
@Test public void testSchemaWithRequiredRequiredOptionalFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " required group b {" + " required group c {" + " optional int32 d;" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", INTEGER))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<Integer> dValues = asList(111, null, 333, 444, null, 666, 777); List<List> cValues = createTestStructs(dValues); List<List> bValues = createTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaIntObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
@Test public void testSchemaWithRequiredOptionalRequiredFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " required group b {" + " optional group c {" + " required binary d (UTF8);" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", VARCHAR))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<String> dValues = asList("d1", "d2", "d3", "d4", "d5", "d6", "d7"); Iterable<List> cValues = createNullableTestStructs(dValues); List<List> bValues = createTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaStringObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
"} "); Type cType = RowType.from(singletonList(field("d", VARCHAR))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<String> dValues = asList("d1", "d2", "d3", "d4", "d5", "d6", "d7"); Iterable<List> cValues = createNullableTestStructs(dValues); List<List> aValues = createTestStructs(bValues); Type gType = RowType.from(singletonList(field("h", VARCHAR))); Type fType = RowType.from(singletonList(field("g", gType))); Type eType = RowType.from(singletonList(field("f", fType))); Iterable<String> hValues = asList("h1", "h2", "h3", "h4", "h5", "h6", "h7"); Iterable<List> gValues = createNullableTestStructs(hValues);
@Test public void testSchemaWithRequiredStruct() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " required group a {" + " required group b {" + " required binary c (UTF8);" + " required int32 d;" + " }" + " required binary e (UTF8);" + " }" + "} "); Type bType = RowType.from(asList(field("c", VARCHAR), field("d", INTEGER))); Type aType = RowType.from(asList(field("b", bType), field("e", VARCHAR))); Iterable<String> cValues = limit(cycle(asList("c0", "c1", "c2", "c3", "c4", "c5", "c6", "c7")), 30000); Iterable<Integer> dValues = intsBetween(0, 30000); Iterable<String> eValues = limit(cycle(asList("e0", "e1", "e2", "e3", "e4", "e5", "e6", "e7")), 30000); List<List> bValues = createTestStructs(cValues, dValues); List<List> aValues = createTestStructs(bValues, eValues); ObjectInspector bInspector = getStandardStructObjectInspector(asList("c", "d"), asList(javaStringObjectInspector, javaIntObjectInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(asList("b", "e"), asList(bInspector, javaStringObjectInspector)); tester.assertRoundTrip(singletonList(aInspector), new Iterable<?>[] {aValues}, new Iterable<?>[] { aValues}, singletonList("a"), singletonList(aType), Optional.of(parquetSchema)); }
@Test public void testRowHistograms() { RowType innerRowType = RowType.from(ImmutableList.of( RowType.field("f1", BIGINT), RowType.field("f2", DOUBLE))); MapType mapType = mapType(innerRowType, BIGINT); InternalAggregationFunction aggregationFunction = getAggregation(mapType.getTypeSignature(), innerRowType.getTypeSignature()); BlockBuilder builder = innerRowType.createBlockBuilder(null, 3); innerRowType.writeObject(builder, toRow(ImmutableList.of(BIGINT, DOUBLE), 1L, 1.0)); innerRowType.writeObject(builder, toRow(ImmutableList.of(BIGINT, DOUBLE), 2L, 2.0)); innerRowType.writeObject(builder, toRow(ImmutableList.of(BIGINT, DOUBLE), 3L, 3.0)); assertAggregation( aggregationFunction, ImmutableMap.of(ImmutableList.of(1L, 1.0), 1L, ImmutableList.of(2L, 2.0), 1L, ImmutableList.of(3L, 3.0), 1L), builder.build()); }
@Test public void testParquetTupleDomainStruct() { HiveColumnHandle columnHandle = new HiveColumnHandle("my_struct", HiveType.valueOf("struct<a:int,b:int>"), parseTypeSignature(StandardTypes.ROW), 0, REGULAR, Optional.empty()); RowType.Field rowField = new RowType.Field(Optional.of("my_struct"), INTEGER); RowType rowType = RowType.from(ImmutableList.of(rowField)); TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(rowType))); MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_struct", new PrimitiveType(OPTIONAL, INT32, "a"), new PrimitiveType(OPTIONAL, INT32, "b"))); Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain); assertTrue(tupleDomain.getDomains().get().isEmpty()); }
"{\"a\" \n :1, \"b\": \t [2, 3]}", "ROW(a INTEGER, b ARRAY<INTEGER>)", RowType.from(ImmutableList.of( RowType.field("a", INTEGER), RowType.field("b", new ArrayType(INTEGER)))),
@Test public void testRowDisplayMixedUnnamedColumns() { List<RowType.Field> fields = asList( RowType.field(BOOLEAN), RowType.field("double_col", DOUBLE), RowType.field(new ArrayType(VARCHAR)), RowType.field("map_col", new MapType( BOOLEAN, DOUBLE, methodHandle(TestRowType.class, "throwUnsupportedOperation"), methodHandle(TestRowType.class, "throwUnsupportedOperation"), methodHandle(TestRowType.class, "throwUnsupportedOperation"), methodHandle(TestRowType.class, "throwUnsupportedOperation")))); RowType row = RowType.from(fields); assertEquals( row.getDisplayName(), "row(boolean, double_col double, array(varchar), map_col map(boolean, double))"); }
Iterable<Object> simpleStructs = transform(insertNullEvery(5, writeValues), RcFileTester::toHiveStruct); testRoundTripType( RowType.from(ImmutableList.of(RowType.field("field", createRowType(type)))), transform(simpleStructs, Collections::singletonList), skipFormatsSet);
@Test public void testDoubleRowMap() { RowType innerRowType = RowType.from(ImmutableList.of( RowType.field("f1", INTEGER), RowType.field("f2", DOUBLE))); MapType mapType = mapType(DOUBLE, innerRowType); InternalAggregationFunction aggFunc = metadata.getFunctionRegistry().getAggregateFunctionImplementation(new Signature(NAME, AGGREGATE, mapType.getTypeSignature(), parseTypeSignature(StandardTypes.DOUBLE), innerRowType.getTypeSignature())); BlockBuilder builder = innerRowType.createBlockBuilder(null, 3); innerRowType.writeObject(builder, toRow(ImmutableList.of(INTEGER, DOUBLE), 1L, 1.0)); innerRowType.writeObject(builder, toRow(ImmutableList.of(INTEGER, DOUBLE), 2L, 2.0)); innerRowType.writeObject(builder, toRow(ImmutableList.of(INTEGER, DOUBLE), 3L, 3.0)); assertAggregation( aggFunc, ImmutableMap.of(1.0, ImmutableList.of(1, 1.0), 2.0, ImmutableList.of(2, 2.0), 3.0, ImmutableList.of(3, 3.0)), createDoublesBlock(1.0, 2.0, 3.0), builder.build()); }
@Test public void testRowDisplayName() { List<RowType.Field> fields = asList( RowType.field("bool_col", BOOLEAN), RowType.field("double_col", DOUBLE), RowType.field("array_col", new ArrayType(VARCHAR)), RowType.field("map_col", new MapType( BOOLEAN, DOUBLE, methodHandle(TestRowType.class, "throwUnsupportedOperation"), methodHandle(TestRowType.class, "throwUnsupportedOperation"), methodHandle(TestRowType.class, "throwUnsupportedOperation"), methodHandle(TestRowType.class, "throwUnsupportedOperation")))); RowType row = RowType.from(fields); assertEquals( row.getDisplayName(), "row(bool_col boolean, double_col double, array_col array(varchar), map_col map(boolean, double))"); }
@Test public void testParquetTupleDomainStructArray() { HiveColumnHandle columnHandle = new HiveColumnHandle("my_array_struct", HiveType.valueOf("array<struct<a:int>>"), parseTypeSignature(StandardTypes.ARRAY), 0, REGULAR, Optional.empty()); RowType.Field rowField = new RowType.Field(Optional.of("a"), INTEGER); RowType rowType = RowType.from(ImmutableList.of(rowField)); TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(new ArrayType(rowType)))); MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_array_struct", new GroupType(REPEATED, "bag", new GroupType(OPTIONAL, "array_element", new PrimitiveType(OPTIONAL, INT32, "a"))))); Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain); assertTrue(tupleDomain.getDomains().get().isEmpty()); }
RowType.from(ImmutableList.of( RowType.field("a", BIGINT), RowType.field("b", DOUBLE),