@Override public Type list(Types.ListType list, Supplier<Type> future) { int newId = nextId.get(); if (list.isElementOptional()) { return Types.ListType.ofOptional(newId, future.get()); } else { return Types.ListType.ofRequired(newId, future.get()); } }
@Override public Type list(Types.ListType list, Supplier<Type> elementTypeFuture) { Preconditions.checkArgument(sourceType.isListType(), "Not a list: " + sourceType); Types.ListType sourceList = sourceType.asListType(); int sourceElementId = sourceList.elementId(); this.sourceType = sourceList.elementType(); try { if (list.isElementOptional()) { return Types.ListType.ofOptional(sourceElementId, elementTypeFuture.get()); } else { return Types.ListType.ofRequired(sourceElementId, elementTypeFuture.get()); } } finally { this.sourceType = sourceList; } }
@Override public Type list(Types.ListType list, Supplier<Type> elementResult) { Preconditions.checkArgument(current instanceof ArrayType, "Not an array: %s", current); ArrayType array = (ArrayType) current; Preconditions.checkArgument(array.containsNull() || !list.isElementOptional(), "Cannot project an array of optional elements as required elements: %s", array); this.current = array.elementType(); try { Type elementType = elementResult.get(); if (list.elementType() == elementType) { return list; } // must be a projected element type, create a new list if (list.isElementOptional()) { return Types.ListType.ofOptional(list.elementId(), elementType); } else { return Types.ListType.ofRequired(list.elementId(), elementType); } } finally { this.current = array; } }
@Test public void mapConversions() throws IOException { // consistent behavior for maps conversions. The below test case, correctly does not specify map key types convertToPigSchema( new Schema( required( 1, "a", MapType.ofRequired( 2, 3, StringType.get(), ListType.ofRequired( 4, StructType.of( required(5, "b", LongType.get()), required(6, "c", StringType.get())))))), "a:[{(b:long,c:chararray)}]", "We do not specify the map key type here"); // struct<a:map<string,map<string,double>>> -> (a:[[double]]) // As per https://pig.apache.org/docs/latest/basic.html#map-schema. It seems that // we only need to specify value type as keys are always of type chararray convertToPigSchema( new Schema( StructType.of( required(1, "a", MapType.ofRequired( 2, 3, StringType.get(), MapType.ofRequired(4, 5, StringType.get(), DoubleType.get()))) ).fields()), "a:[[double]]", "A map key type does not need to be specified"); }
@Override public Type array(Schema array, Type elementType) { if (array.getLogicalType() instanceof LogicalMap) { // map stored as an array Schema keyValueSchema = array.getElementType(); Preconditions.checkArgument(AvroSchemaUtil.isKeyValueSchema(keyValueSchema), "Invalid key-value pair schema: {}", keyValueSchema); Types.StructType keyValueType = elementType.asStructType(); Types.NestedField keyField = keyValueType.field("key"); Types.NestedField valueField = keyValueType.field("value"); if (keyValueType.field("value").isOptional()) { return Types.MapType.ofOptional( keyField.fieldId(), valueField.fieldId(), keyField.type(), valueField.type()); } else { return Types.MapType.ofRequired( keyField.fieldId(), valueField.fieldId(), keyField.type(), valueField.type()); } } else { // normal array Schema elementSchema = array.getElementType(); int id = getElementId(array); if (AvroSchemaUtil.isOptionSchema(elementSchema)) { return Types.ListType.ofOptional(id, elementType); } else { return Types.ListType.ofRequired(id, elementType); } } }
static StructType getType(StructType partitionType) { // IDs start at 100 to leave room for changes to ManifestEntry return StructType.of( required(100, "file_path", StringType.get()), required(101, "file_format", StringType.get()), required(102, "partition", partitionType), required(103, "record_count", LongType.get()), required(104, "file_size_in_bytes", LongType.get()), required(105, "block_size_in_bytes", LongType.get()), optional(106, "file_ordinal", IntegerType.get()), optional(107, "sort_columns", ListType.ofRequired(112, IntegerType.get())), optional(108, "column_sizes", MapType.ofRequired(117, 118, IntegerType.get(), LongType.get())), optional(109, "value_counts", MapType.ofRequired(119, 120, IntegerType.get(), LongType.get())), optional(110, "null_value_counts", MapType.ofRequired(121, 122, IntegerType.get(), LongType.get())), optional(125, "lower_bounds", MapType.ofRequired(126, 127, IntegerType.get(), BinaryType.get())), optional(128, "upper_bounds", MapType.ofRequired(129, 130, IntegerType.get(), BinaryType.get())) // NEXT ID TO ASSIGN: 131 ); }
@Override public Type list(Types.ListType list, Supplier<Type> elementResult) { Preconditions.checkArgument(current instanceof ArrayType, "Not an array: %s", current); ArrayType array = (ArrayType) current; Preconditions.checkArgument(array.containsNull() || !list.isElementOptional(), "Cannot project an array of optional elements as required elements: %s", array); this.current = array.elementType(); try { Type elementType = elementResult.get(); if (list.elementType() == elementType) { return list; } // must be a projected element type, create a new list if (list.isElementOptional()) { return Types.ListType.ofOptional(list.elementId(), elementType); } else { return Types.ListType.ofRequired(list.elementId(), elementType); } } finally { this.current = array; } }
@Override public Type list(Types.ListType list, Supplier<Type> elementTypeFuture) { Preconditions.checkArgument(sourceType.isListType(), "Not a list: " + sourceType); Types.ListType sourceList = sourceType.asListType(); this.sourceType = sourceList.elementType(); try { Type elementType = elementTypeFuture.get(); if (list.elementType() == elementType) { return list; } if (list.isElementOptional()) { return Types.ListType.ofOptional(list.elementId(), elementType); } else { return Types.ListType.ofRequired(list.elementId(), elementType); } } finally { this.sourceType = sourceList; } }
@Test public void testMapOfListToStructs() { Type map = Types.MapType.ofRequired(33, 34, Types.ListType.ofRequired(35, Types.IntegerType.get()), Types.StructType.of( required(36, "a", Types.IntegerType.get()), optional(37, "b", Types.IntegerType.get()) )); Schema schema = AvroSchemaUtil.createMap( 33, addElementId(35, Schema.createArray(Schema.create(Schema.Type.INT))), 34, record("r34", requiredField(36, "a", Schema.create(Schema.Type.INT)), optionalField(37, "b", Schema.create(Schema.Type.INT)))); Assert.assertEquals("Avro schema to map", map, AvroSchemaUtil.convert(schema)); Assert.assertEquals("Map to Avro schema", schema, AvroSchemaUtil.convert(map)); }
@Override public Type list(Types.ListType list, Type result) { // use field to apply updates Type elementResult = field(list.fields().get(0), result); if (elementResult == null) { throw new IllegalArgumentException("Cannot delete element type from list: " + list); } if (list.elementType() == elementResult) { return list; } if (list.isElementOptional()) { return Types.ListType.ofOptional(list.elementId(), elementResult); } else { return Types.ListType.ofRequired(list.elementId(), elementResult); } }
@Test public void testListOfStructs() { Type list = Types.ListType.ofRequired(34, Types.StructType.of( required(35, "lat", Types.FloatType.get()), required(36, "long", Types.FloatType.get()) )); Schema schema = addElementId(34, SchemaBuilder.array().items( record("r34", requiredField(35, "lat", Schema.create(Schema.Type.FLOAT)), requiredField(36, "long", Schema.create(Schema.Type.FLOAT))) )); Assert.assertEquals("Avro schema to list", list, AvroSchemaUtil.convert(schema)); Assert.assertEquals("List to Avro schema", schema, AvroSchemaUtil.convert(list)); }
@Override public Type list(GroupType array, Type elementType) { GroupType repeated = array.getType(0).asGroupType(); org.apache.parquet.schema.Type element = repeated.getType(0); Preconditions.checkArgument( !element.isRepetition(Repetition.REPEATED), "Elements cannot have repetition REPEATED: {}", element); int elementFieldId = getId(element); addAlias(element.getName(), elementFieldId); if (element.isRepetition(Repetition.OPTIONAL)) { return Types.ListType.ofOptional(elementFieldId, elementType); } else { return Types.ListType.ofRequired(elementFieldId, elementType); } }
@Test public void doubleWrappingTuples() throws IOException { // struct<a:array<struct<b:string>>> -> (a:{(b:chararray)}) convertToPigSchema( new Schema( StructType.of( required(1, "a", ListType.ofRequired(2, StructType.of(required(3, "b", StringType.get())))) ).fields()), "a:{(b:chararray)}", "A tuple inside a bag should not be double wrapped"); // struct<a:array<boolean>> -> "(a:{(boolean)}) convertToPigSchema( new Schema(StructType.of(required(1, "a", ListType.ofRequired(2, BooleanType.get()))).fields()), "a:{(boolean)}", "boolean (or anything non-tuple) element inside a bag should be wrapped inside a tuple" ); }
@Override public Type list(Types.ListType list, Type elementResult) { if (selected.contains(list.elementId())) { return list; } else if (elementResult != null) { if (list.elementType() == elementResult) { return list; } else if (list.isElementOptional()) { return Types.ListType.ofOptional(list.elementId(), elementResult); } else { return Types.ListType.ofRequired(list.elementId(), elementResult); } } return null; }
@Test public void testRequiredListElement() { Schema write = new Schema(required(0, "list_field", Types.ListType.ofOptional( 1, Types.IntegerType.get() ))); Schema read = new Schema(required(0, "list_field", Types.ListType.ofRequired( 1, Types.IntegerType.get() ))); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain that elements are optional", errors.get(0).contains("elements should be required, but are optional")); }
private static Types.ListType listFromJson(JsonNode json) { int elementId = JsonUtil.getInt(ELEMENT_ID, json); Type elementType = typeFromJson(json.get(ELEMENT)); boolean isRequired = JsonUtil.getBool(ELEMENT_REQUIRED, json); if (isRequired) { return Types.ListType.ofRequired(elementId, elementType); } else { return Types.ListType.ofOptional(elementId, elementType); } }
@Test public void testLists() throws Exception { Type[] maps = new Type[] { Types.ListType.ofOptional(2, Types.DoubleType.get()), Types.ListType.ofRequired(5, Types.DoubleType.get()) }; for (Type list : maps) { Type copy = TestHelpers.roundTripSerialize(list); Assert.assertEquals("List serialization should be equal to starting type", list, copy); Assert.assertSame("List serialization should preserve identity type", Types.DoubleType.get(), list.asNestedType().asListType().elementType()); } }
@Test public void testList() { Type list = Types.ListType.ofRequired(34, Types.UUIDType.get()); Schema schema = addElementId(34, SchemaBuilder.array().items( LogicalTypes.uuid().addToSchema(Schema.createFixed("uuid_fixed", null, null, 16)))); Assert.assertEquals("Avro schema to list", list, AvroSchemaUtil.convert(schema)); Assert.assertEquals("List to Avro schema", schema, AvroSchemaUtil.convert(list)); }
@Test public void testLongInBag() throws IOException { Schema icebergSchema = new Schema( optional( 1, "nested_list", MapType.ofOptional( 2, 3, StringType.get(), ListType.ofRequired(5, LongType.get())))); SchemaUtil.convert(icebergSchema); }
@Override public Type array(ArrayType array, Type elementType) { if (array.containsNull()) { return Types.ListType.ofOptional(getNextId(), elementType); } else { return Types.ListType.ofRequired(getNextId(), elementType); } }