@Override public Type map(Types.MapType map, Supplier<Type> keyFuture, Supplier<Type> valuefuture) { int newKeyId = nextId.get(); int newValueId = nextId.get(); if (map.isValueOptional()) { return Types.MapType.ofOptional(newKeyId, newValueId, keyFuture.get(), valuefuture.get()); } else { return Types.MapType.ofRequired(newKeyId, newValueId, keyFuture.get(), valuefuture.get()); } }
@Override public Type map(Types.MapType map, Type kResult, Type vResult) { // if any updates are intended for the key, throw an exception int keyId = map.fields().get(0).fieldId(); if (deletes.contains(keyId)) { throw new IllegalArgumentException("Cannot delete map keys: " + map); } else if (updates.containsKey(keyId)) { throw new IllegalArgumentException("Cannot update map keys: " + map); } else if (adds.containsKey(keyId)) { throw new IllegalArgumentException("Cannot add fields to map keys: " + map); } else if (!map.keyType().equals(kResult)) { throw new IllegalArgumentException("Cannot alter map keys: " + map); } // use field to apply updates to the value Type valueResult = field(map.fields().get(1), vResult); if (valueResult == null) { throw new IllegalArgumentException("Cannot delete value type from map: " + map); } if (map.valueType() == valueResult) { return map; } if (map.isValueOptional()) { return Types.MapType.ofOptional(map.keyId(), map.valueId(), map.keyType(), valueResult); } else { return Types.MapType.ofRequired(map.keyId(), map.valueId(), map.keyType(), valueResult); } }
private static Types.MapType mapFromJson(JsonNode json) { int keyId = JsonUtil.getInt(KEY_ID, json); Type keyType = typeFromJson(json.get(KEY)); int valueId = JsonUtil.getInt(VALUE_ID, json); Type valueType = typeFromJson(json.get(VALUE)); boolean isRequired = JsonUtil.getBool(VALUE_REQUIRED, json); if (isRequired) { return Types.MapType.ofRequired(keyId, valueId, keyType, valueType); } else { return Types.MapType.ofOptional(keyId, valueId, keyType, valueType); } }
@Test public void mapConversions() throws IOException { // consistent behavior for maps conversions. The below test case, correctly does not specify map key types convertToPigSchema( new Schema( required( 1, "a", MapType.ofRequired( 2, 3, StringType.get(), ListType.ofRequired( 4, StructType.of( required(5, "b", LongType.get()), required(6, "c", StringType.get())))))), "a:[{(b:long,c:chararray)}]", "We do not specify the map key type here"); // struct<a:map<string,map<string,double>>> -> (a:[[double]]) // As per https://pig.apache.org/docs/latest/basic.html#map-schema. It seems that // we only need to specify value type as keys are always of type chararray convertToPigSchema( new Schema( StructType.of( required(1, "a", MapType.ofRequired( 2, 3, StringType.get(), MapType.ofRequired(4, 5, StringType.get(), DoubleType.get()))) ).fields()), "a:[[double]]", "A map key type does not need to be specified"); }
@Override public Type array(Schema array, Type elementType) { if (array.getLogicalType() instanceof LogicalMap) { // map stored as an array Schema keyValueSchema = array.getElementType(); Preconditions.checkArgument(AvroSchemaUtil.isKeyValueSchema(keyValueSchema), "Invalid key-value pair schema: {}", keyValueSchema); Types.StructType keyValueType = elementType.asStructType(); Types.NestedField keyField = keyValueType.field("key"); Types.NestedField valueField = keyValueType.field("value"); if (keyValueType.field("value").isOptional()) { return Types.MapType.ofOptional( keyField.fieldId(), valueField.fieldId(), keyField.type(), valueField.type()); } else { return Types.MapType.ofRequired( keyField.fieldId(), valueField.fieldId(), keyField.type(), valueField.type()); } } else { // normal array Schema elementSchema = array.getElementType(); int id = getElementId(array); if (AvroSchemaUtil.isOptionSchema(elementSchema)) { return Types.ListType.ofOptional(id, elementType); } else { return Types.ListType.ofRequired(id, elementType); } } }
static StructType getType(StructType partitionType) { // IDs start at 100 to leave room for changes to ManifestEntry return StructType.of( required(100, "file_path", StringType.get()), required(101, "file_format", StringType.get()), required(102, "partition", partitionType), required(103, "record_count", LongType.get()), required(104, "file_size_in_bytes", LongType.get()), required(105, "block_size_in_bytes", LongType.get()), optional(106, "file_ordinal", IntegerType.get()), optional(107, "sort_columns", ListType.ofRequired(112, IntegerType.get())), optional(108, "column_sizes", MapType.ofRequired(117, 118, IntegerType.get(), LongType.get())), optional(109, "value_counts", MapType.ofRequired(119, 120, IntegerType.get(), LongType.get())), optional(110, "null_value_counts", MapType.ofRequired(121, 122, IntegerType.get(), LongType.get())), optional(125, "lower_bounds", MapType.ofRequired(126, 127, IntegerType.get(), BinaryType.get())), optional(128, "upper_bounds", MapType.ofRequired(129, 130, IntegerType.get(), BinaryType.get())) // NEXT ID TO ASSIGN: 131 ); }
@Override public Type map(Types.MapType map, Supplier<Type> keyResult, Supplier<Type> valueResult) { Preconditions.checkArgument(current instanceof MapType, "Not a map: %s", current); MapType m = (MapType) current; Preconditions.checkArgument(m.valueContainsNull() || !map.isValueOptional(), "Cannot project a map of optional values as required values: %s", map); Preconditions.checkArgument(StringType.class.isInstance(m.keyType()), "Invalid map key type (not string): %s", m.keyType()); this.current = m.valueType(); try { Type valueType = valueResult.get(); if (map.valueType() == valueType) { return map; } if (map.isValueOptional()) { return Types.MapType.ofOptional(map.keyId(), map.valueId(), map.keyType(), valueType); } else { return Types.MapType.ofRequired(map.keyId(), map.valueId(), map.keyType(), valueType); } } finally { this.current = m; } }
@Override public Type map(Types.MapType map, Supplier<Type> keyResult, Supplier<Type> valueResult) { Preconditions.checkArgument(current instanceof MapType, "Not a map: %s", current); MapType m = (MapType) current; Preconditions.checkArgument(m.valueContainsNull() || !map.isValueOptional(), "Cannot project a map of optional values as required values: %s", map); this.current = m.valueType(); try { Type valueType = valueResult.get(); if (map.valueType() == valueType) { return map; } if (map.isValueOptional()) { return Types.MapType.ofOptional(map.keyId(), map.valueId(), map.keyType(), valueType); } else { return Types.MapType.ofRequired(map.keyId(), map.valueId(), map.keyType(), valueType); } } finally { this.current = m; } }
@Override public Type map(Types.MapType map, Supplier<Type> keyTypeFuture, Supplier<Type> valueTypeFuture) { Preconditions.checkArgument(sourceType.isMapType(), "Not a map: " + sourceType); Types.MapType sourceMap = sourceType.asMapType(); try { this.sourceType = sourceMap.keyType(); Type keyType = keyTypeFuture.get(); this.sourceType = sourceMap.valueType(); Type valueType = valueTypeFuture.get(); if (map.keyType() == keyType && map.valueType() == valueType) { return map; } if (map.isValueOptional()) { return Types.MapType.ofOptional(map.keyId(), map.valueId(), keyType, valueType); } else { return Types.MapType.ofRequired(map.keyId(), map.valueId(), keyType, valueType); } } finally { this.sourceType = sourceMap; } }
@Override public Type map(Types.MapType map, Supplier<Type> keyTypeFuture, Supplier<Type> valueTypeFuture) { Preconditions.checkArgument(sourceType.isMapType(), "Not a map: " + sourceType); Types.MapType sourceMap = sourceType.asMapType(); int sourceKeyId = sourceMap.keyId(); int sourceValueId = sourceMap.valueId(); try { this.sourceType = sourceMap.keyType(); Type keyType = keyTypeFuture.get(); this.sourceType = sourceMap.valueType(); Type valueType = valueTypeFuture.get(); if (map.isValueOptional()) { return Types.MapType.ofOptional(sourceKeyId, sourceValueId, keyType, valueType); } else { return Types.MapType.ofRequired(sourceKeyId, sourceValueId, keyType, valueType); } } finally { this.sourceType = sourceMap; } }
@Override public Type map(Types.MapType map, Type ignored, Type valueResult) { if (selected.contains(map.valueId())) { return map; } else if (valueResult != null) { if (map.valueType() == valueResult) { return map; } else if (map.isValueOptional()) { return Types.MapType.ofOptional(map.keyId(), map.valueId(), map.keyType(), valueResult); } else { return Types.MapType.ofRequired(map.keyId(), map.valueId(), map.keyType(), valueResult); } } else if (selected.contains(map.keyId())) { // right now, maps can't be selected without values return map; } return null; }
@Override public Type map(GroupType map, Type keyType, Type valueType) { GroupType keyValue = map.getType(0).asGroupType(); org.apache.parquet.schema.Type key = keyValue.getType(0); org.apache.parquet.schema.Type value = keyValue.getType(1); Preconditions.checkArgument( !value.isRepetition(Repetition.REPEATED), "Values cannot have repetition REPEATED: {}", value); int keyFieldId = getId(key); int valueFieldId = getId(value); addAlias(key.getName(), keyFieldId); addAlias(value.getName(), valueFieldId); if (value.isRepetition(Repetition.OPTIONAL)) { return Types.MapType.ofOptional(keyFieldId, valueFieldId, keyType, valueType); } else { return Types.MapType.ofRequired(keyFieldId, valueFieldId, keyType, valueType); } }
@Test public void testMapOfListToStructs() { Type map = Types.MapType.ofRequired(33, 34, Types.ListType.ofRequired(35, Types.IntegerType.get()), Types.StructType.of( required(36, "a", Types.IntegerType.get()), optional(37, "b", Types.IntegerType.get()) )); Schema schema = AvroSchemaUtil.createMap( 33, addElementId(35, Schema.createArray(Schema.create(Schema.Type.INT))), 34, record("r34", requiredField(36, "a", Schema.create(Schema.Type.INT)), optionalField(37, "b", Schema.create(Schema.Type.INT)))); Assert.assertEquals("Avro schema to map", map, AvroSchemaUtil.convert(schema)); Assert.assertEquals("Map to Avro schema", schema, AvroSchemaUtil.convert(map)); }
@Test public void testMapOfStringToStructs() { Type map = Types.MapType.ofRequired(33, 34, Types.StringType.get(), Types.StructType.of( required(35, "a", Types.IntegerType.get()), optional(36, "b", Types.IntegerType.get()) )); Schema schema = addKeyId(33, addValueId(34, SchemaBuilder.map().values( record("r34", requiredField(35, "a", Schema.create(Schema.Type.INT)), optionalField(36, "b", Schema.create(Schema.Type.INT)))))); Assert.assertEquals("Avro schema to map", map, AvroSchemaUtil.convert(schema)); Assert.assertEquals("Map to Avro schema", schema, AvroSchemaUtil.convert(map)); }
@Test public void testMaps() throws Exception { Type[] maps = new Type[] { Types.MapType.ofOptional(1, 2, Types.StringType.get(), Types.LongType.get()), Types.MapType.ofRequired(4, 5, Types.StringType.get(), Types.LongType.get()) }; for (Type map : maps) { Type copy = TestHelpers.roundTripSerialize(map); Assert.assertEquals("Map serialization should be equal to starting type", map, copy); Assert.assertSame("Map serialization should preserve identity type", Types.LongType.get(), map.asNestedType().asMapType().valueType()); } }
@Test public void testRequiredMapValue() { Schema write = new Schema(required(0, "map_field", Types.MapType.ofOptional( 1, 2, Types.StringType.get(), Types.IntegerType.get() ))); Schema read = new Schema(required(0, "map_field", Types.MapType.ofRequired( 1, 2, Types.StringType.get(), Types.IntegerType.get() ))); List<String> errors = CheckCompatibility.writeCompatibilityErrors(read, write); Assert.assertEquals("Should produce 1 error message", 1, errors.size()); Assert.assertTrue("Should complain that values are optional", errors.get(0).contains("values should be required, but are optional")); }
@Test public void testMapOfStringToBytes() { Type map = Types.MapType.ofRequired(33, 34, Types.StringType.get(), Types.BinaryType.get()); Schema schema = addKeyId(33, addValueId(34, SchemaBuilder.map().values( Schema.create(Schema.Type.BYTES)))); Assert.assertEquals("Avro schema to map", map, AvroSchemaUtil.convert(schema)); Assert.assertEquals("Map to Avro schema", schema, AvroSchemaUtil.convert(map)); }
@Override public Type map(MapType map, Type keyType, Type valueType) { if (map.valueContainsNull()) { return Types.MapType.ofOptional(getNextId(), getNextId(), keyType, valueType); } else { return Types.MapType.ofRequired(getNextId(), getNextId(), keyType, valueType); } }
@Override public Type map(Schema map, Type valueType) { Schema valueSchema = map.getValueType(); int keyId = getKeyId(map); int valueId = getValueId(map); if (AvroSchemaUtil.isOptionSchema(valueSchema)) { return Types.MapType.ofOptional(keyId, valueId, Types.StringType.get(), valueType); } else { return Types.MapType.ofRequired(keyId, valueId, Types.StringType.get(), valueType); } }
@Test public void testMapOfLongToBytes() { Type map = Types.MapType.ofRequired(33, 34, Types.LongType.get(), Types.BinaryType.get()); Schema schema = AvroSchemaUtil.createMap( 33, Schema.create(Schema.Type.LONG), 34, Schema.create(Schema.Type.BYTES)); Assert.assertEquals("Avro schema to map", map, AvroSchemaUtil.convert(schema)); Assert.assertEquals("Map to Avro schema", schema, AvroSchemaUtil.convert(map)); }