@Test public void testDoubleRowMap() { RowType innerRowType = RowType.from(ImmutableList.of( RowType.field("f1", BIGINT), RowType.field("f2", DOUBLE))); testMultimapAgg(DOUBLE, ImmutableList.of(1.0, 2.0, 3.0), innerRowType, ImmutableList.of(ImmutableList.of(1L, 1.0), ImmutableList.of(2L, 2.0), ImmutableList.of(3L, 3.0))); }
@Test public void testRowDisplayMixedUnnamedColumns() { List<RowType.Field> fields = asList( RowType.field(BOOLEAN), RowType.field("double_col", DOUBLE), RowType.field(new ArrayType(VARCHAR)), RowType.field("map_col", new MapType( BOOLEAN, DOUBLE, methodHandle(TestRowType.class, "throwUnsupportedOperation"), methodHandle(TestRowType.class, "throwUnsupportedOperation"), methodHandle(TestRowType.class, "throwUnsupportedOperation"), methodHandle(TestRowType.class, "throwUnsupportedOperation")))); RowType row = RowType.from(fields); assertEquals( row.getDisplayName(), "row(boolean, double_col double, array(varchar), map_col map(boolean, double))"); }
@Test public void testRowDisplayName() { List<RowType.Field> fields = asList( RowType.field("bool_col", BOOLEAN), RowType.field("double_col", DOUBLE), RowType.field("array_col", new ArrayType(VARCHAR)), RowType.field("map_col", new MapType( BOOLEAN, DOUBLE, methodHandle(TestRowType.class, "throwUnsupportedOperation"), methodHandle(TestRowType.class, "throwUnsupportedOperation"), methodHandle(TestRowType.class, "throwUnsupportedOperation"), methodHandle(TestRowType.class, "throwUnsupportedOperation")))); RowType row = RowType.from(fields); assertEquals( row.getDisplayName(), "row(bool_col boolean, double_col double, array_col array(varchar), map_col map(boolean, double))"); }
@Test public void testSchemaWithRequiredStruct() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " required group a {" + " required group b {" + " required binary c (UTF8);" + " required int32 d;" + " }" + " required binary e (UTF8);" + " }" + "} "); Type bType = RowType.from(asList(field("c", VARCHAR), field("d", INTEGER))); Type aType = RowType.from(asList(field("b", bType), field("e", VARCHAR))); Iterable<String> cValues = limit(cycle(asList("c0", "c1", "c2", "c3", "c4", "c5", "c6", "c7")), 30000); Iterable<Integer> dValues = intsBetween(0, 30000); Iterable<String> eValues = limit(cycle(asList("e0", "e1", "e2", "e3", "e4", "e5", "e6", "e7")), 30000); List<List> bValues = createTestStructs(cValues, dValues); List<List> aValues = createTestStructs(bValues, eValues); ObjectInspector bInspector = getStandardStructObjectInspector(asList("c", "d"), asList(javaStringObjectInspector, javaIntObjectInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(asList("b", "e"), asList(bInspector, javaStringObjectInspector)); tester.assertRoundTrip(singletonList(aInspector), new Iterable<?>[] {aValues}, new Iterable<?>[] { aValues}, singletonList("a"), singletonList(aType), Optional.of(parquetSchema)); }
@Test public void testSchemaWithOptionalOptionalRequiredFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " optional group b {" + " optional group c {" + " required binary d (UTF8);" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", VARCHAR))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<String> dValues = asList("d1", "d2", "d3", "d4", "d5", "d6", "d7"); Iterable<List> cValues = createNullableTestStructs(dValues); Iterable<List> bValues = createNullableTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaStringObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
@Test public void testSchemaWithRequiredRequiredOptionalFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " required group b {" + " required group c {" + " optional int32 d;" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", INTEGER))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<Integer> dValues = asList(111, null, 333, 444, null, 666, 777); List<List> cValues = createTestStructs(dValues); List<List> bValues = createTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaIntObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
@Test public void testSchemaWithRequiredOptionalRequiredFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " required group b {" + " optional group c {" + " required binary d (UTF8);" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", VARCHAR))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<String> dValues = asList("d1", "d2", "d3", "d4", "d5", "d6", "d7"); Iterable<List> cValues = createNullableTestStructs(dValues); List<List> bValues = createTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaStringObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
@Test public void testSchemaWithOptionalRequiredOptionalFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " optional group b {" + " required group c {" + " optional int32 d;" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", INTEGER))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<Integer> dValues = asList(111, null, 333, 444, null, 666, 777); List<List> cValues = createTestStructs(dValues); Iterable<List> bValues = createNullableTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaIntObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
@Test public void testSchemaWithRequiredOptionalOptionalFields() throws Exception { MessageType parquetSchema = parseMessageType("message hive_schema {" + " optional group a {" + " required group b {" + " optional group c {" + " optional int32 d;" + " }" + " }" + " }" + "} "); Type cType = RowType.from(singletonList(field("d", INTEGER))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<Integer> dValues = asList(111, null, 333, 444, null, 666, 777); Iterable<List> cValues = createNullableTestStructs(dValues); List<List> bValues = createTestStructs(cValues); List<List> aValues = createTestStructs(bValues); ObjectInspector cInspector = getStandardStructObjectInspector(singletonList("d"), singletonList(javaIntObjectInspector)); ObjectInspector bInspector = getStandardStructObjectInspector(singletonList("c"), singletonList(cInspector)); ObjectInspector aInspector = getStandardStructObjectInspector(singletonList("b"), singletonList(bInspector)); tester.testRoundTrip(aInspector, aValues, aValues, "a", aType, Optional.of(parquetSchema)); }
"cast(row(json '2', json '1.5', json 'true', json '\"abc\"', json '[1, 2]') as row(a BIGINT, b DOUBLE, c BOOLEAN, d VARCHAR, e ARRAY(BIGINT)))", RowType.from(ImmutableList.of( RowType.field("a", BIGINT), RowType.field("b", DOUBLE), RowType.field("c", BOOLEAN), RowType.field("d", VARCHAR), RowType.field("e", new ArrayType(BIGINT)))), asList(2L, 1.5, true, "abc", ImmutableList.of(1L, 2L)));
"} "); Type cType = RowType.from(singletonList(field("d", VARCHAR))); Type bType = RowType.from(singletonList(field("c", cType))); Type aType = RowType.from(singletonList(field("b", bType))); Iterable<String> dValues = asList("d1", "d2", "d3", "d4", "d5", "d6", "d7"); Iterable<List> cValues = createNullableTestStructs(dValues); List<List> aValues = createTestStructs(bValues); Type gType = RowType.from(singletonList(field("h", VARCHAR))); Type fType = RowType.from(singletonList(field("g", gType))); Type eType = RowType.from(singletonList(field("f", fType))); Iterable<String> hValues = asList("h1", "h2", "h3", "h4", "h5", "h6", "h7"); Iterable<List> gValues = createNullableTestStructs(hValues);
@Test public void testRowHistograms() { RowType innerRowType = RowType.from(ImmutableList.of( RowType.field("f1", BIGINT), RowType.field("f2", DOUBLE))); MapType mapType = mapType(innerRowType, BIGINT); InternalAggregationFunction aggregationFunction = getAggregation(mapType.getTypeSignature(), innerRowType.getTypeSignature()); BlockBuilder builder = innerRowType.createBlockBuilder(null, 3); innerRowType.writeObject(builder, toRow(ImmutableList.of(BIGINT, DOUBLE), 1L, 1.0)); innerRowType.writeObject(builder, toRow(ImmutableList.of(BIGINT, DOUBLE), 2L, 2.0)); innerRowType.writeObject(builder, toRow(ImmutableList.of(BIGINT, DOUBLE), 3L, 3.0)); assertAggregation( aggregationFunction, ImmutableMap.of(ImmutableList.of(1L, 1.0), 1L, ImmutableList.of(2L, 2.0), 1L, ImmutableList.of(3L, 3.0), 1L), builder.build()); }
"CAST(JSON '{\"k2\": null, \"k1\": null}' AS ROW(k1 VARCHAR, k2 BIGINT))", RowType.from(ImmutableList.of( RowType.field("k1", VARCHAR), RowType.field("k2", BIGINT))), Lists.newArrayList(null, null)); "CAST(JSON '{\"k1\": [1, 2], \"used\": 3, \"k2\": [4, 5]}' AS ROW(used BIGINT))", RowType.from(ImmutableList.of( RowType.field("used", BIGINT))), ImmutableList.of(3L)); assertFunction( "CAST(JSON '[{\"k1\": [1, 2], \"used\": 3, \"k2\": [4, 5]}]' AS ARRAY<ROW(used BIGINT)>)", new ArrayType(RowType.from(ImmutableList.of( RowType.field("used", BIGINT)))), ImmutableList.of(ImmutableList.of(3L))); "CAST(JSON '{\"a\":1,\"c\":3}' AS ROW(a BIGINT, b BIGINT, c BIGINT, d BIGINT))", RowType.from(ImmutableList.of( RowType.field("a", BIGINT), RowType.field("b", BIGINT), RowType.field("c", BIGINT), RowType.field("d", BIGINT))), asList(1L, null, 3L, null)); assertFunction( new ArrayType( RowType.from(ImmutableList.of( RowType.field("a", BIGINT), RowType.field("b", BIGINT),
@Test public void testDoubleRowMap() { RowType innerRowType = RowType.from(ImmutableList.of( RowType.field("f1", INTEGER), RowType.field("f2", DOUBLE))); MapType mapType = mapType(DOUBLE, innerRowType); InternalAggregationFunction aggFunc = metadata.getFunctionRegistry().getAggregateFunctionImplementation(new Signature(NAME, AGGREGATE, mapType.getTypeSignature(), parseTypeSignature(StandardTypes.DOUBLE), innerRowType.getTypeSignature())); BlockBuilder builder = innerRowType.createBlockBuilder(null, 3); innerRowType.writeObject(builder, toRow(ImmutableList.of(INTEGER, DOUBLE), 1L, 1.0)); innerRowType.writeObject(builder, toRow(ImmutableList.of(INTEGER, DOUBLE), 2L, 2.0)); innerRowType.writeObject(builder, toRow(ImmutableList.of(INTEGER, DOUBLE), 3L, 3.0)); assertAggregation( aggFunc, ImmutableMap.of(1.0, ImmutableList.of(1, 1.0), 2.0, ImmutableList.of(2, 2.0), 3.0, ImmutableList.of(3, 3.0)), createDoublesBlock(1.0, 2.0, 3.0), builder.build()); }
javaIntObjectInspector)); Type struct1Type = RowType.from(asList( field("mapIntStringField", mapType(INTEGER, VARCHAR)), field("stringArrayField", new ArrayType(VARCHAR)), field("intField", INTEGER))); struct1ObjectInspector)); Type struct2Type = RowType.from(asList( field("mapIntStringField", mapType(INTEGER, VARCHAR)), field("stringArrayField", new ArrayType(VARCHAR)), field("structField", struct1Type))); javaBooleanObjectInspector)); Type struct3Type = RowType.from(asList( field("mapIntDoubleField", mapType(INTEGER, DOUBLE)), field("booleanArrayField", new ArrayType(BOOLEAN)), field("booleanField", BOOLEAN))); struct3ObjectInspector)); Type struct4Type = RowType.from(asList( field("mapIntDoubleField", mapType(INTEGER, DOUBLE)), field("booleanArrayField", new ArrayType(BOOLEAN)), field("structField", struct3Type)));
Iterable<Object> simpleStructs = transform(insertNullEvery(5, writeValues), RcFileTester::toHiveStruct); testRoundTripType( RowType.from(ImmutableList.of(RowType.field("field", createRowType(type)))), transform(simpleStructs, Collections::singletonList), skipFormatsSet);