private static GroupType listWrapper(final String name, final OriginalType originalType, final Type elementType, final Repetition repetition) { return new GroupType(repetition, name, originalType, elementType); } }
private static GroupType listWrapper(final String name, final OriginalType originalType, final Type elementType, final Repetition repetition) { return new GroupType(repetition, name, originalType, elementType); } }
private static GroupType listWrapper(final String name, final OriginalType originalType, final GroupType groupType) { return new GroupType(Repetition.OPTIONAL, name, originalType, groupType); } }
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, Type nested) { if (!nested.isRepetition(Repetition.REPEATED)) { throw new IllegalArgumentException("Nested type should be repeated: " + nested); } return new GroupType(repetition, alias, originalType, nested); }
@Override protected Type buildSchema() { List<Type> fields = new ArrayList<>(); fields.add(0, this.elementConverter.schema()); return new GroupType(this.jsonSchema.optionalOrRequired(), this.jsonSchema.getColumnName(), fields); }
@Test public void testParquetTupleDomainStructArray() { HiveColumnHandle columnHandle = new HiveColumnHandle("my_array_struct", HiveType.valueOf("array<struct<a:int>>"), parseTypeSignature(StandardTypes.ARRAY), 0, REGULAR, Optional.empty()); RowType.Field rowField = new RowType.Field(Optional.of("a"), INTEGER); RowType rowType = RowType.from(ImmutableList.of(rowField)); TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(new ArrayType(rowType)))); MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_array_struct", new GroupType(REPEATED, "bag", new GroupType(OPTIONAL, "array_element", new PrimitiveType(OPTIONAL, INT32, "a"))))); Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain); assertTrue(tupleDomain.getDomains().get().isEmpty()); }
@Test public void testParquetTupleDomainPrimitiveArray() { HiveColumnHandle columnHandle = new HiveColumnHandle("my_array", HiveType.valueOf("array<int>"), parseTypeSignature(StandardTypes.ARRAY), 0, REGULAR, Optional.empty()); TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(new ArrayType(INTEGER)))); MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_array", new GroupType(REPEATED, "bag", new PrimitiveType(OPTIONAL, INT32, "array_element")))); Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain); assertTrue(tupleDomain.getDomains().get().isEmpty()); }
private static GroupType convertArrayType(final String name, final ListTypeInfo typeInfo) { final TypeInfo subType = typeInfo.getListElementTypeInfo(); return listWrapper(name, OriginalType.LIST, new GroupType(Repetition.REPEATED, ParquetHiveSerDe.ARRAY.toString(), convertType("array_element", subType))); }
public static GroupType mapType(Repetition repetition, String alias, String mapAlias, Type keyType, Type valueType) { //support projection only on key of a map if (valueType == null) { return listWrapper( repetition, alias, MAP_KEY_VALUE, new GroupType( Repetition.REPEATED, mapAlias, keyType)); } else { if (!valueType.getName().equals("value")) { throw new RuntimeException(valueType.getName() + " should be value"); } return listWrapper( repetition, alias, MAP_KEY_VALUE, new GroupType( Repetition.REPEATED, mapAlias, keyType, valueType)); } }
private Type buildSchema() { JsonArray inputSchema = this.jsonSchema.getDataTypeValues(); List<Type> parquetTypes = new ArrayList<>(); for (JsonElement element : inputSchema) { JsonObject map = (JsonObject) element; JsonSchema elementSchema = new JsonSchema(map); String columnName = elementSchema.getColumnName(); JsonElementConverter converter = JsonElementConversionFactory.getConverter(elementSchema, false); Type schemaType = converter.schema(); this.converters.put(columnName, converter); parquetTypes.add(schemaType); } String docName = this.jsonSchema.getColumnName(); switch (recordType) { case ROOT: return new MessageType(docName, parquetTypes); case CHILD: return new GroupType(this.jsonSchema.optionalOrRequired(), docName, parquetTypes); default: throw new RuntimeException("Unsupported Record type"); } }
public static GroupType mapType(Repetition repetition, String alias, String mapAlias, Type keyType, Type valueType) { //support projection only on key of a map if (valueType == null) { return listWrapper( repetition, alias, MAP_KEY_VALUE, new GroupType( Repetition.REPEATED, mapAlias, keyType)); } else { if (!valueType.getName().equals("value")) { throw new RuntimeException(valueType.getName() + " should be value"); } return listWrapper( repetition, alias, MAP_KEY_VALUE, new GroupType( Repetition.REPEATED, mapAlias, keyType, valueType)); } }
private static GroupType convertStructType(final String name, final StructTypeInfo typeInfo) { final List<String> columnNames = typeInfo.getAllStructFieldNames(); final List<TypeInfo> columnTypes = typeInfo.getAllStructFieldTypeInfos(); return new GroupType(Repetition.OPTIONAL, name, convertTypes(columnNames, columnTypes)); }
private static GroupType convertStructType(final String name, final StructTypeInfo typeInfo, final Repetition repetition) { final List<String> columnNames = typeInfo.getAllStructFieldNames(); final List<TypeInfo> columnTypes = typeInfo.getAllStructFieldTypeInfos(); return new GroupType(repetition, name, convertTypes(columnNames, columnTypes)); }
private static GroupType convertStructType(final String name, final StructTypeInfo typeInfo, final Repetition repetition) { final List<String> columnNames = typeInfo.getAllStructFieldNames(); final List<TypeInfo> columnTypes = typeInfo.getAllStructFieldTypeInfos(); return new GroupType(repetition, name, convertTypes(columnNames, columnTypes)); }
@Test public void testParquetTupleDomainStruct() { HiveColumnHandle columnHandle = new HiveColumnHandle("my_struct", HiveType.valueOf("struct<a:int,b:int>"), parseTypeSignature(StandardTypes.ROW), 0, REGULAR, Optional.empty()); RowType.Field rowField = new RowType.Field(Optional.of("my_struct"), INTEGER); RowType rowType = RowType.from(ImmutableList.of(rowField)); TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(rowType))); MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_struct", new PrimitiveType(OPTIONAL, INT32, "a"), new PrimitiveType(OPTIONAL, INT32, "b"))); Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain); assertTrue(tupleDomain.getDomains().get().isEmpty()); }
@Test public void testParquetTupleDomainMap() { HiveColumnHandle columnHandle = new HiveColumnHandle("my_map", HiveType.valueOf("map<int,int>"), parseTypeSignature(StandardTypes.MAP), 0, REGULAR, Optional.empty()); MapType mapType = new MapType( INTEGER, INTEGER, methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"), methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"), methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException"), methodHandle(TestParquetPredicateUtils.class, "throwUnsupportedOperationException")); TupleDomain<HiveColumnHandle> domain = withColumnDomains(ImmutableMap.of(columnHandle, Domain.notNull(mapType))); MessageType fileSchema = new MessageType("hive_schema", new GroupType(OPTIONAL, "my_map", new GroupType(REPEATED, "map", new PrimitiveType(REQUIRED, INT32, "key"), new PrimitiveType(OPTIONAL, INT32, "value")))); Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, fileSchema); TupleDomain<ColumnDescriptor> tupleDomain = getParquetTupleDomain(descriptorsByPath, domain); assertTrue(tupleDomain.getDomains().get().isEmpty()); }
/** * to preserve the difference between empty list and null * @param alias * @param originalType * @param groupType * @return an optional group */ private static GroupType listWrapper(Repetition repetition, String alias, OriginalType originalType, GroupType groupType) { return new GroupType(repetition, alias, originalType, groupType); }
private static GroupType listWrapper(final String name, final OriginalType originalType, final Type elementType, final Repetition repetition) { return new GroupType(repetition, name, originalType, elementType); } }
private static GroupType convertStructType(final String name, final StructTypeInfo typeInfo, final Repetition repetition) { final List<String> columnNames = typeInfo.getAllStructFieldNames(); final List<TypeInfo> columnTypes = typeInfo.getAllStructFieldTypeInfos(); return new GroupType(repetition, name, convertTypes(columnNames, columnTypes)); }