/** * Configure the dataset's partition strategy from a String literal. * * The String literal is a JSON-formatted partition strategy that can be * produced by {@link PartitionStrategy#toString()}. * * @param literal * A partition strategy String literal * @return This builder for method chaining. * @throws ValidationException * If the literal is not a valid JSON-encoded partition strategy * * @since 0.14.0 */ public Builder partitionStrategyLiteral(String literal) { this.partitionStrategy = PartitionStrategyParser.parse(literal); return this; }
/** * Parses a PartitionStrategy from a File * * @param file * The File that contains the PartitionStrategy in JSON format. * @return The PartitionStrategy. */ public static PartitionStrategy parse(File file) { return buildPartitionStrategy(JsonUtil.parse(file)); }
public static Schema embedPartitionStrategy(Schema schema, PartitionStrategy strategy) { // TODO: avoid embedding strategies in the schema // Avro considers Props read-only and uses an older Jackson version // Parse the Schema as a String because Avro uses com.codehaus.jackson ObjectNode schemaJson = JsonUtil.parse(schema.toString(), ObjectNode.class); schemaJson.set(PARTITIONS, toJson(strategy)); return new Schema.Parser().parse(schemaJson.toString()); }
@Test public void testAddEmbeddedPartitionStrategy() { PartitionStrategy strategy = new PartitionStrategy.Builder() .hash("username", 16) .identity("username", "u") .build(); Schema original = new Schema.Parser().parse("{" + " \"type\": \"record\"," + " \"name\": \"User\"," + " \"fields\": [" + " {\"name\": \"id\", \"type\": \"long\"}," + " {\"name\": \"username\", \"type\": \"string\"}," + " {\"name\": \"real_name\", \"type\": \"string\"}" + " ]" + "}"); Schema embedded = PartitionStrategyParser.embedPartitionStrategy(original, strategy); Assert.assertTrue(PartitionStrategyParser.hasEmbeddedStrategy(embedded)); Assert.assertEquals(strategy, PartitionStrategyParser.parseFromSchema(embedded)); }
if (PartitionStrategyParser.hasEmbeddedStrategy(schema)) { this.partitionStrategy = PartitionStrategyParser.parseFromSchema(schema);
private static Schema getEmbeddedSchema(DatasetDescriptor descriptor) { // the SchemaManager stores schemas, so this embeds the column mapping and // partition strategy in the schema. the result is parsed by // AvroKeyEntitySchemaParser Schema schema = descriptor.getSchema(); if (descriptor.isColumnMapped()) { schema = ColumnMappingParser .embedColumnMapping(schema, descriptor.getColumnMapping()); } if (descriptor.isPartitioned()) { schema = PartitionStrategyParser .embedPartitionStrategy(schema, descriptor.getPartitionStrategy()); } return schema; }
private static void printInfo(Logger console, Dataset<?> dataset) { DatasetDescriptor desc = dataset.getDescriptor(); String schema = ColumnMappingParser.removeEmbeddedMapping( PartitionStrategyParser.removeEmbeddedStrategy(desc.getSchema())) .toString(true); Collection<String> properties = desc.listProperties(); console.info("\nDataset \"{}\":", dataset.getName()); console.info("\tURI: \"{}\"", dataset.getUri()); console.info("\tSchema: {}", indent(schema)); if (desc.isPartitioned()) { console.info("\tPartition strategy: {}", indent(desc.getPartitionStrategy().toString(true))); } else { console.info("\tNot partitioned"); } if (desc.isColumnMapped()) { console.info("\tColumn mapping: {}", indent(desc.getColumnMapping().toString(true))); } if (!properties.isEmpty()) { StringBuilder sb = new StringBuilder(); for (String prop : properties) { sb.append("\n\t\t").append(prop).append("=") .append(desc.getProperty(prop)); } console.info("\tProperties:{}", sb.toString()); } }
@Test public void testReplaceEmbeddedPartitionStrategy() { PartitionStrategy strategy = new PartitionStrategy.Builder() .hash("username", 16) .identity("username", "u") .build(); Schema original = new Schema.Parser().parse("{" + " \"type\": \"record\"," + " \"name\": \"User\"," + " \"partitions\": [" + " {\"type\": \"hash\", \"source\": \"real_name\", \"buckets\": 64}," + " {\"type\": \"identity\", \"source\": \"real_name\", \"name\": \"r\"}" + " ]," + " \"fields\": [" + " {\"name\": \"id\", \"type\": \"long\"}," + " {\"name\": \"username\", \"type\": \"string\"}," + " {\"name\": \"real_name\", \"type\": \"string\"}" + " ]" + "}"); Assert.assertTrue(PartitionStrategyParser.hasEmbeddedStrategy(original)); Assert.assertFalse(PartitionStrategyParser.parseFromSchema(original).equals(strategy)); Schema embedded = PartitionStrategyParser.embedPartitionStrategy(original, strategy); Assert.assertTrue(PartitionStrategyParser.hasEmbeddedStrategy(embedded)); Assert.assertEquals(strategy, PartitionStrategyParser.parseFromSchema(embedded)); } }
private static Schema getEmbeddedSchema(DatasetDescriptor descriptor) { // the SchemaManager stores schemas, so this embeds the column mapping and // partition strategy in the schema. the result is parsed by // AvroKeyEntitySchemaParser Schema schema = descriptor.getSchema(); if (descriptor.isColumnMapped()) { schema = ColumnMappingParser .embedColumnMapping(schema, descriptor.getColumnMapping()); } if (descriptor.isPartitioned()) { schema = PartitionStrategyParser .embedPartitionStrategy(schema, descriptor.getPartitionStrategy()); } return schema; }
.format(Formats.INPUTFORMAT) .schema(ColumnMappingParser.removeEmbeddedMapping( PartitionStrategyParser.removeEmbeddedStrategy(schema)));
/** * Configure the dataset's partition strategy from an InputStream. * * The InputStream contents must be a JSON-formatted partition strategy * that is produced by {@link PartitionStrategy#toString()}. * * @param in * The input stream * @return An instance of the builder for method chaining. * @throws ValidationException * If the stream does not contain a valid JSON-encoded partition * strategy * @throws DatasetIOException * If there is an IOException accessing the InputStream contents * * @since 0.14.0 */ public Builder partitionStrategy(InputStream in) { this.partitionStrategy = PartitionStrategyParser.parse(in); return this; }
.location(source.toUri()) .schema(ColumnMappingParser.removeEmbeddedMapping( PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema))) .format("json") .build();
/** * Parses a PartitionStrategy from a JSON string. * * @param json * The JSON string * @return The PartitionStrategy. */ public static PartitionStrategy parse(String json) { return buildPartitionStrategy(JsonUtil.parse(json)); }
public static String toString(PartitionStrategy strategy, boolean pretty) { StringWriter writer = new StringWriter(); JsonGenerator gen; try { gen = new JsonFactory().createGenerator(writer); if (pretty) { gen.useDefaultPrettyPrinter(); } gen.setCodec(new ObjectMapper()); gen.writeTree(toJson(strategy)); gen.close(); } catch (IOException e) { throw new DatasetIOException("Cannot write to JSON generator", e); } return writer.toString(); }
/** * Configure the dataset's partition strategy from a File. * * The File contents must be a JSON-formatted partition strategy that is * produced by {@link PartitionStrategy#toString()}. * * @param file * The File * @return * An instance of the builder for method chaining. * @throws ValidationException * If the file does not contain a valid JSON-encoded partition * strategy * @throws DatasetIOException * If there is an IOException accessing the file contents * * @since 0.14.0 */ public Builder partitionStrategy(File file) { this.partitionStrategy = PartitionStrategyParser.parse(file); return this; }
.location(source.toUri()) .schema(ColumnMappingParser.removeEmbeddedMapping( PartitionStrategyParser.removeEmbeddedStrategy(datasetSchema))) .format("csv") .build();
/** * Parses a PartitionStrategy from an input stream * * @param in * The input stream that contains the PartitionStrategy in JSON * format. * @return The PartitionStrategy. */ public static PartitionStrategy parse(InputStream in) { return buildPartitionStrategy(JsonUtil.parse(in)); }
@Override public void run() { PartitionStrategyParser.parse("[ {" + "\"type\": \"provided\", " + "\"name\": \"version\", " + "\"values\": \"float\"" + "} ]"); } });
@Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"" + type + "\"} ]"); } }
@Override public void run() { PartitionStrategyParser.parse("[ {\"type\": \"cats\", \"source\": \"banana\"} ]"); } }