org.kitesdk.data.spi.ColumnMappingParser java code examples

/**
 * Configure the dataset's column mappings from a String literal.
 *
 * The String literal is a JSON-formatted representation that can be
 * produced by {@link ColumnMapping#toString()}.
 *
 * @param literal
 *          A column mapping String literal
 * @return This builder for method chaining
 * @throws ValidationException
 *          If the literal is not valid JSON-encoded column mappings
 *
 * @since 0.14.0
 */
public Builder columnMappingLiteral(String literal) {
 this.columnMapping = ColumnMappingParser.parse(literal);
 return this;
}

/**
 * Parses the Mapping Descriptor as a JSON string.
 * 
 * @param mappingDescriptor
 *          The mapping descriptor as a JSON string
 * @return ColumnMapping
 */
public static ColumnMapping parse(String mappingDescriptor) {
 return buildColumnMapping(JsonUtil.parse(mappingDescriptor));
}

if (ColumnMappingParser.hasEmbeddedColumnMapping(schema)) {
 this.columnMapping = ColumnMappingParser.parseFromSchema(schema);
} else if (ColumnMappingParser.hasEmbeddedFieldMappings(schema)) {
 this.columnMapping = ColumnMappingParser.parseFromSchemaFields(schema);
 if (partitionStrategy == null) {
    ColumnMappingParser.parseKeyMappingsFromSchemaFields(schema));

@Test
public void testAddEmbeddedColumnMapping() {
 ColumnMapping mapping = new ColumnMapping.Builder()
   .key("id")
   .column("username", "u", "username")
   .column("real_name", "u", "name")
   .build();
 Schema original = new Schema.Parser().parse("{" +
   "  \"type\": \"record\"," +
   "  \"name\": \"User\"," +
   "  \"partitions\": [" +
   "    {\"type\": \"identity\", \"source\": \"id\", \"name\": \"id_copy\"}" +
   "  ]," +
   "  \"fields\": [" +
   "    {\"name\": \"id\", \"type\": \"long\"}," +
   "    {\"name\": \"username\", \"type\": \"string\"}," +
   "    {\"name\": \"real_name\", \"type\": \"string\"}" +
   "  ]" +
   "}");
 Schema embedded = ColumnMappingParser.embedColumnMapping(original, mapping);
 junit.framework.Assert.assertTrue(ColumnMappingParser.hasEmbeddedColumnMapping(embedded));
 junit.framework.Assert.assertEquals(mapping, ColumnMappingParser.parseFromSchema(embedded));
}

private static Schema getEmbeddedSchema(DatasetDescriptor descriptor) {
 // the SchemaManager stores schemas, so this embeds the column mapping and
 // partition strategy in the schema. the result is parsed by
 // AvroKeyEntitySchemaParser
 Schema schema = descriptor.getSchema();
 if (descriptor.isColumnMapped()) {
  schema = ColumnMappingParser
    .embedColumnMapping(schema, descriptor.getColumnMapping());
 }
 if (descriptor.isPartitioned()) {
  schema = PartitionStrategyParser
    .embedPartitionStrategy(schema, descriptor.getPartitionStrategy());
 }
 return schema;
}

@Test
public void testGetKeyMappingsFromSchemaFields() {
 Schema schema = new Schema.Parser().parse("{" +
   "  \"type\": \"record\"," +
   "  \"name\": \"User\"," +
   "  \"fields\": [" +
   "    {\"name\": \"id\", \"type\": \"long\", \"mapping\":" +
   "      {\"type\": \"key\", \"value\": \"1\"} }," +
   "    {\"name\": \"username\", \"type\": \"string\", \"mapping\":" +
   "      {\"type\": \"key\", \"value\": \"0\"} }," +
   "    {\"name\": \"real_name\", \"type\": \"string\", \"mapping\":" +
   "      {\"type\": \"column\", \"value\": \"m:name\"} }" +
   "  ]" +
   "}");
 Map<Integer, FieldMapping> keys = ColumnMappingParser
   .parseKeyMappingsFromSchemaFields(schema);
 ImmutableMap expected = ImmutableMap.builder()
   .put(0, FieldMapping.key("username"))
   .put(1, FieldMapping.key("id"))
   .build();
 Assert.assertEquals(expected, keys);
}

/**
 * Parses the FieldMapping from an annotated schema field.
 *
 * @param mappingNode
 *          The value of the "mapping" node
 * @return FieldMapping
 */
public static FieldMapping parseFieldMapping(JsonNode mappingNode) {
 ValidationException.check(mappingNode.isObject(),
   "A column mapping must be a JSON record");
 ValidationException.check(mappingNode.has(SOURCE),
   "Partitioners must have a %s.", SOURCE);
 String source = mappingNode.get("source").asText();
 return parseFieldMapping(source, mappingNode);
}

private static void printInfo(Logger console, Dataset<?> dataset) {
 DatasetDescriptor desc = dataset.getDescriptor();
 String schema = ColumnMappingParser.removeEmbeddedMapping(
   PartitionStrategyParser.removeEmbeddedStrategy(desc.getSchema()))
   .toString(true);
 Collection<String> properties = desc.listProperties();
 console.info("\nDataset \"{}\":", dataset.getName());
 console.info("\tURI: \"{}\"", dataset.getUri());
 console.info("\tSchema: {}", indent(schema));
 if (desc.isPartitioned()) {
  console.info("\tPartition strategy: {}",
    indent(desc.getPartitionStrategy().toString(true)));
 } else {
  console.info("\tNot partitioned");
 }
 if (desc.isColumnMapped()) {
  console.info("\tColumn mapping: {}",
    indent(desc.getColumnMapping().toString(true)));
 }
 if (!properties.isEmpty()) {
  StringBuilder sb = new StringBuilder();
  for (String prop : properties) {
   sb.append("\n\t\t").append(prop).append("=")
     .append(desc.getProperty(prop));
  }
  console.info("\tProperties:{}", sb.toString());
 }
}

  "  ]" +
  "}");
Assert.assertTrue(ColumnMappingParser.hasEmbeddedColumnMapping(original));
Assert.assertFalse(ColumnMappingParser.parseFromSchema(original).equals(mapping));
Schema embedded = ColumnMappingParser.embedColumnMapping(original, mapping);
Assert.assertTrue(ColumnMappingParser.hasEmbeddedColumnMapping(embedded));
Assert.assertEquals(mapping, ColumnMappingParser.parseFromSchema(embedded));

private static Schema getEmbeddedSchema(DatasetDescriptor descriptor) {
 // the SchemaManager stores schemas, so this embeds the column mapping and
 // partition strategy in the schema. the result is parsed by
 // AvroKeyEntitySchemaParser
 Schema schema = descriptor.getSchema();
 if (descriptor.isColumnMapped()) {
  schema = ColumnMappingParser
    .embedColumnMapping(schema, descriptor.getColumnMapping());
 }
 if (descriptor.isPartitioned()) {
  schema = PartitionStrategyParser
    .embedPartitionStrategy(schema, descriptor.getPartitionStrategy());
 }
 return schema;
}

private static ColumnMapping buildColumnMapping(JsonNode node) {
 ValidationException.check(node.isArray(),
   "Must be a JSON array of column mappings");
 ColumnMapping.Builder builder = new ColumnMapping.Builder();
 for (Iterator<JsonNode> it = node.elements(); it.hasNext();) {
  builder.fieldMapping(parseFieldMapping(it.next()));
 }
 return builder.build();
}

.property(InputFormatUtil.INPUT_FORMAT_RECORD_PROP, recordType)
.format(Formats.INPUTFORMAT)
.schema(ColumnMappingParser.removeEmbeddedMapping(
  PartitionStrategyParser.removeEmbeddedStrategy(schema)));

/**
 * Configure the dataset's column mapping descriptor from a File.
 *
 * The File contents must be a JSON-formatted column mapping. This format
 * can produced by {@link ColumnMapping#toString()}.
 *
 * @param file
 *          The file
 * @return This builder for method chaining
 * @throws ValidationException
 *          If the literal is not valid JSON-encoded column mappings
 * @throws DatasetIOException
 *          If there is an IOException accessing the file contents
 *
 * @since 0.14.0
 */
public Builder columnMapping(File file) {
 this.columnMapping = ColumnMappingParser.parse(file);
 return this;
}

public static Map<Integer, FieldMapping> parseKeyMappingsFromSchemaFields(
  Schema schema) {
 Map<Integer, FieldMapping> keyMappings = Maps.newHashMap();
 if (Schema.Type.RECORD == schema.getType()) {
  for (Schema.Field field : schema.getFields()) {
   if (field.getJsonProp(MAPPING) != null) {
    // parse the String because Avro uses com.codehaus.jackson
    JsonNode mappingNode = JsonUtil.parse(
      field.getJsonProp(MAPPING).toString());
    FieldMapping fm = parseFieldMapping(field.name(), mappingNode);
    if (FieldMapping.MappingType.KEY == fm.getMappingType() &&
      mappingNode.has(VALUE)) {
     Integer index = mappingNode.get(VALUE).asInt();
     keyMappings.put(index, fm);
    }
   }
  }
  return keyMappings;
 }
 throw new IllegalArgumentException(
   "Cannot parse field-level mappings from non-Record");
}

.schema(ColumnMappingParser.removeEmbeddedMapping(
  PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema)))
.format("json")

/**
 * Parses the Mapping Descriptor from a File
 *
 * @param file
 *          The File that contains the Mapping Descriptor in JSON format.
 * @return ColumnMapping.
 */
public static ColumnMapping parse(File file) {
 return buildColumnMapping(JsonUtil.parse(file));
}

/**
 * Configure the dataset's column mapping descriptor from an InputStream.
 *
 * The InputStream contents must be a JSON-formatted column mapping. This
 * format can produced by {@link ColumnMapping#toString()}.
 *
 * @param in
 *          The input stream
 * @return This builder for method chaining
 * @throws ValidationException
 *          If the literal is not valid JSON-encoded column mappings
 * @throws DatasetIOException
 *          If there is an IOException accessing the InputStream contents
 *
 * @since 0.14.0
 */
public Builder columnMapping(InputStream in) {
 this.columnMapping = ColumnMappingParser.parse(in);
 return this;
}

public static ColumnMapping parseFromSchemaFields(Schema schema) {
 if (Schema.Type.RECORD == schema.getType()) {
  ColumnMapping.Builder builder = new ColumnMapping.Builder();
  for (Schema.Field field : schema.getFields()) {
   if (field.getJsonProp(MAPPING) != null) {
    // parse the String because Avro uses com.codehaus.jackson
    builder.fieldMapping(parseFieldMapping(field.name(),
      JsonUtil.parse(field.getJsonProp(MAPPING).toString())));
   }
  }
  return builder.build();
 }
 throw new IllegalArgumentException(
   "Cannot parse field-level mappings from non-Record");
}

.schema(ColumnMappingParser.removeEmbeddedMapping(
  PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema)))
.format("csv")

/**
 * Parses the Mapping Descriptor from an input stream
 *
 * @param in
 *          The input stream that contains the Mapping Descriptor in JSON
 *          format.
 * @return ColumnMapping.
 */
public static ColumnMapping parse(InputStream in) {
 return buildColumnMapping(JsonUtil.parse(in));
}

Javadoc

Parser for ColumnMapping. Will parse the mapping annotation from Avro schemas, and will parse the ColumnMapping JSON format. An example of that is:

 
[ 
{ "source": "field1", "type": "column", "value": "cf:field1" }, 
{ "source": "field2", "type": "keyAsColumn", "value": "kac:" }, 
{ "source": "field3", "type": "occVersion" } 
]

Most used methods

embedColumnMapping
hasEmbeddedColumnMapping
parse
Parses the Mapping Descriptor as a JSON string.
parseFromSchema
parseKeyMappingsFromSchemaFields
buildColumnMapping
hasEmbeddedFieldMappings
parseFieldMapping
Parses the FieldMapping from an annotated schema field.
parseFromSchemaFields
removeEmbeddedMapping
toJson
toString

Popular in Java

Making http post requests using okhttp
setScale (BigDecimal)
scheduleAtFixedRate (ScheduledExecutorService)
scheduleAtFixedRate (Timer)
IOException (java.io)
Signals a general, I/O-related error. Error details may be specified when calling the constructor, a
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
CountDownLatch (java.util.concurrent)
A synchronization aid that allows one or more threads to wait until a set of operations being perfor
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
JFrame (javax.swing)
From CI to AI: The AI layer in your organization

How to useColumnMappingParser in org.kitesdk.data.spi

Best Java code snippets using org.kitesdk.data.spi.ColumnMappingParser (Showing top 20 results out of 315)

How to use
ColumnMappingParser
in
org.kitesdk.data.spi