/** * Configure the dataset's column mappings from a String literal. * * The String literal is a JSON-formatted representation that can be * produced by {@link ColumnMapping#toString()}. * * @param literal * A column mapping String literal * @return This builder for method chaining * @throws ValidationException * If the literal is not valid JSON-encoded column mappings * * @since 0.14.0 */ public Builder columnMappingLiteral(String literal) { this.columnMapping = ColumnMappingParser.parse(literal); return this; }
/** * Parses the Mapping Descriptor as a JSON string. * * @param mappingDescriptor * The mapping descriptor as a JSON string * @return ColumnMapping */ public static ColumnMapping parse(String mappingDescriptor) { return buildColumnMapping(JsonUtil.parse(mappingDescriptor)); }
if (ColumnMappingParser.hasEmbeddedColumnMapping(schema)) { this.columnMapping = ColumnMappingParser.parseFromSchema(schema); } else if (ColumnMappingParser.hasEmbeddedFieldMappings(schema)) { this.columnMapping = ColumnMappingParser.parseFromSchemaFields(schema); if (partitionStrategy == null) { ColumnMappingParser.parseKeyMappingsFromSchemaFields(schema));
@Test public void testAddEmbeddedColumnMapping() { ColumnMapping mapping = new ColumnMapping.Builder() .key("id") .column("username", "u", "username") .column("real_name", "u", "name") .build(); Schema original = new Schema.Parser().parse("{" + " \"type\": \"record\"," + " \"name\": \"User\"," + " \"partitions\": [" + " {\"type\": \"identity\", \"source\": \"id\", \"name\": \"id_copy\"}" + " ]," + " \"fields\": [" + " {\"name\": \"id\", \"type\": \"long\"}," + " {\"name\": \"username\", \"type\": \"string\"}," + " {\"name\": \"real_name\", \"type\": \"string\"}" + " ]" + "}"); Schema embedded = ColumnMappingParser.embedColumnMapping(original, mapping); junit.framework.Assert.assertTrue(ColumnMappingParser.hasEmbeddedColumnMapping(embedded)); junit.framework.Assert.assertEquals(mapping, ColumnMappingParser.parseFromSchema(embedded)); }
private static Schema getEmbeddedSchema(DatasetDescriptor descriptor) { // the SchemaManager stores schemas, so this embeds the column mapping and // partition strategy in the schema. the result is parsed by // AvroKeyEntitySchemaParser Schema schema = descriptor.getSchema(); if (descriptor.isColumnMapped()) { schema = ColumnMappingParser .embedColumnMapping(schema, descriptor.getColumnMapping()); } if (descriptor.isPartitioned()) { schema = PartitionStrategyParser .embedPartitionStrategy(schema, descriptor.getPartitionStrategy()); } return schema; }
@Test public void testGetKeyMappingsFromSchemaFields() { Schema schema = new Schema.Parser().parse("{" + " \"type\": \"record\"," + " \"name\": \"User\"," + " \"fields\": [" + " {\"name\": \"id\", \"type\": \"long\", \"mapping\":" + " {\"type\": \"key\", \"value\": \"1\"} }," + " {\"name\": \"username\", \"type\": \"string\", \"mapping\":" + " {\"type\": \"key\", \"value\": \"0\"} }," + " {\"name\": \"real_name\", \"type\": \"string\", \"mapping\":" + " {\"type\": \"column\", \"value\": \"m:name\"} }" + " ]" + "}"); Map<Integer, FieldMapping> keys = ColumnMappingParser .parseKeyMappingsFromSchemaFields(schema); ImmutableMap expected = ImmutableMap.builder() .put(0, FieldMapping.key("username")) .put(1, FieldMapping.key("id")) .build(); Assert.assertEquals(expected, keys); }
/** * Parses the FieldMapping from an annotated schema field. * * @param mappingNode * The value of the "mapping" node * @return FieldMapping */ public static FieldMapping parseFieldMapping(JsonNode mappingNode) { ValidationException.check(mappingNode.isObject(), "A column mapping must be a JSON record"); ValidationException.check(mappingNode.has(SOURCE), "Partitioners must have a %s.", SOURCE); String source = mappingNode.get("source").asText(); return parseFieldMapping(source, mappingNode); }
private static void printInfo(Logger console, Dataset<?> dataset) { DatasetDescriptor desc = dataset.getDescriptor(); String schema = ColumnMappingParser.removeEmbeddedMapping( PartitionStrategyParser.removeEmbeddedStrategy(desc.getSchema())) .toString(true); Collection<String> properties = desc.listProperties(); console.info("\nDataset \"{}\":", dataset.getName()); console.info("\tURI: \"{}\"", dataset.getUri()); console.info("\tSchema: {}", indent(schema)); if (desc.isPartitioned()) { console.info("\tPartition strategy: {}", indent(desc.getPartitionStrategy().toString(true))); } else { console.info("\tNot partitioned"); } if (desc.isColumnMapped()) { console.info("\tColumn mapping: {}", indent(desc.getColumnMapping().toString(true))); } if (!properties.isEmpty()) { StringBuilder sb = new StringBuilder(); for (String prop : properties) { sb.append("\n\t\t").append(prop).append("=") .append(desc.getProperty(prop)); } console.info("\tProperties:{}", sb.toString()); } }
" ]" + "}"); Assert.assertTrue(ColumnMappingParser.hasEmbeddedColumnMapping(original)); Assert.assertFalse(ColumnMappingParser.parseFromSchema(original).equals(mapping)); Schema embedded = ColumnMappingParser.embedColumnMapping(original, mapping); Assert.assertTrue(ColumnMappingParser.hasEmbeddedColumnMapping(embedded)); Assert.assertEquals(mapping, ColumnMappingParser.parseFromSchema(embedded));
private static Schema getEmbeddedSchema(DatasetDescriptor descriptor) { // the SchemaManager stores schemas, so this embeds the column mapping and // partition strategy in the schema. the result is parsed by // AvroKeyEntitySchemaParser Schema schema = descriptor.getSchema(); if (descriptor.isColumnMapped()) { schema = ColumnMappingParser .embedColumnMapping(schema, descriptor.getColumnMapping()); } if (descriptor.isPartitioned()) { schema = PartitionStrategyParser .embedPartitionStrategy(schema, descriptor.getPartitionStrategy()); } return schema; }
private static ColumnMapping buildColumnMapping(JsonNode node) { ValidationException.check(node.isArray(), "Must be a JSON array of column mappings"); ColumnMapping.Builder builder = new ColumnMapping.Builder(); for (Iterator<JsonNode> it = node.elements(); it.hasNext();) { builder.fieldMapping(parseFieldMapping(it.next())); } return builder.build(); }
.property(InputFormatUtil.INPUT_FORMAT_RECORD_PROP, recordType) .format(Formats.INPUTFORMAT) .schema(ColumnMappingParser.removeEmbeddedMapping( PartitionStrategyParser.removeEmbeddedStrategy(schema)));
/** * Configure the dataset's column mapping descriptor from a File. * * The File contents must be a JSON-formatted column mapping. This format * can produced by {@link ColumnMapping#toString()}. * * @param file * The file * @return This builder for method chaining * @throws ValidationException * If the literal is not valid JSON-encoded column mappings * @throws DatasetIOException * If there is an IOException accessing the file contents * * @since 0.14.0 */ public Builder columnMapping(File file) { this.columnMapping = ColumnMappingParser.parse(file); return this; }
public static Map<Integer, FieldMapping> parseKeyMappingsFromSchemaFields( Schema schema) { Map<Integer, FieldMapping> keyMappings = Maps.newHashMap(); if (Schema.Type.RECORD == schema.getType()) { for (Schema.Field field : schema.getFields()) { if (field.getJsonProp(MAPPING) != null) { // parse the String because Avro uses com.codehaus.jackson JsonNode mappingNode = JsonUtil.parse( field.getJsonProp(MAPPING).toString()); FieldMapping fm = parseFieldMapping(field.name(), mappingNode); if (FieldMapping.MappingType.KEY == fm.getMappingType() && mappingNode.has(VALUE)) { Integer index = mappingNode.get(VALUE).asInt(); keyMappings.put(index, fm); } } } return keyMappings; } throw new IllegalArgumentException( "Cannot parse field-level mappings from non-Record"); }
.schema(ColumnMappingParser.removeEmbeddedMapping( PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema))) .format("json")
/** * Parses the Mapping Descriptor from a File * * @param file * The File that contains the Mapping Descriptor in JSON format. * @return ColumnMapping. */ public static ColumnMapping parse(File file) { return buildColumnMapping(JsonUtil.parse(file)); }
/** * Configure the dataset's column mapping descriptor from an InputStream. * * The InputStream contents must be a JSON-formatted column mapping. This * format can produced by {@link ColumnMapping#toString()}. * * @param in * The input stream * @return This builder for method chaining * @throws ValidationException * If the literal is not valid JSON-encoded column mappings * @throws DatasetIOException * If there is an IOException accessing the InputStream contents * * @since 0.14.0 */ public Builder columnMapping(InputStream in) { this.columnMapping = ColumnMappingParser.parse(in); return this; }
public static ColumnMapping parseFromSchemaFields(Schema schema) { if (Schema.Type.RECORD == schema.getType()) { ColumnMapping.Builder builder = new ColumnMapping.Builder(); for (Schema.Field field : schema.getFields()) { if (field.getJsonProp(MAPPING) != null) { // parse the String because Avro uses com.codehaus.jackson builder.fieldMapping(parseFieldMapping(field.name(), JsonUtil.parse(field.getJsonProp(MAPPING).toString()))); } } return builder.build(); } throw new IllegalArgumentException( "Cannot parse field-level mappings from non-Record"); }
.schema(ColumnMappingParser.removeEmbeddedMapping( PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema))) .format("csv")
/** * Parses the Mapping Descriptor from an input stream * * @param in * The input stream that contains the Mapping Descriptor in JSON * format. * @return ColumnMapping. */ public static ColumnMapping parse(InputStream in) { return buildColumnMapping(JsonUtil.parse(in)); }