strategyBuilder.year(fieldName); } else if ("month".equals(partitionerType)) { strategyBuilder.month(fieldName); } else if ("day".equals(partitionerType)) { strategyBuilder.day(fieldName);
.identity("id") .year("timestamp") .month("timestamp") .day("timestamp") .build();
@Test @SuppressWarnings("unchecked") public void testFromKey() { PartitionStrategy strategy = new PartitionStrategy.Builder() .year("timestamp") .month("timestamp") .day("timestamp") .build(); StorageKey key = new StorageKey(strategy); key.replaceValues((List) Lists.newArrayList(2013, 11, 5)); Assert.assertEquals( new Path("year=2013/month=11/day=05"), convert.fromKey(key)); }
.range("color", "blue", "green", "red") .year("timestamp") .month("timestamp") .day("timestamp") .identity("id")
PartitionStrategy strategy = new PartitionStrategy.Builder() .year("timestamp") .month("timestamp") .day("timestamp") .build();
PartitionStrategy strategy = new PartitionStrategy.Builder() .year("timestamp") .month("timestamp") .day("timestamp") .build();
.partitionStrategy(new PartitionStrategy.Builder() .year("timestamp") .month("timestamp") .day("timestamp") .hour("timestamp")
@Before public void setUp() throws IOException, URISyntaxException { this.conf = (distributed ? MiniDFSTest.getConfiguration() : new Configuration()); this.testDescriptor = new DatasetDescriptor.Builder() .format(Formats.AVRO) .schema(SchemaBuilder.record("Event").fields() .requiredLong("timestamp") .requiredString("message") .endRecord()) .partitionStrategy(new PartitionStrategy.Builder() .year("timestamp") .month("timestamp") .day("timestamp") .build()) .build(); // something completely different this.anotherDescriptor = new DatasetDescriptor.Builder() .format(Formats.PARQUET) .schema(SchemaBuilder.record("Record").fields() .requiredBytes("some_field") .requiredString("another_field") .endRecord()) .partitionStrategy(new PartitionStrategy.Builder() .hash("some_field", 20000) .build()) .build(); this.provider = newProvider(conf); }
.year("created_at").month("created_at").day("created_at") .identity("color") .build();
@Override public int run(String[] args) throws Exception { // where the schema is stored URI schemaURI = URI.create("resource:simple-log.avsc"); // create a Parquet dataset for long-term storage Datasets.create("dataset:file:/tmp/data/logs", new DatasetDescriptor.Builder() .format(Formats.PARQUET) .schemaUri(schemaURI) .partitionStrategy(new PartitionStrategy.Builder() .year("timestamp", "year") .month("timestamp", "month") .day("timestamp", "day") .build()) .build(), Record.class); // create an Avro dataset to temporarily hold data Datasets.create("dataset:file:/tmp/data/logs_staging", new DatasetDescriptor.Builder() .format(Formats.AVRO) .schemaUri(schemaURI) .partitionStrategy(new PartitionStrategy.Builder() .day("timestamp", "day") .build()) .build(), Record.class); return 0; }
@BeforeClass public static void setup() { OCT_12 = new Marker.Builder() .add("year", 2013) .add("month", 10) .add("day", 12) .build(); OCT_15 = new Marker.Builder() .add("year", 2013) .add("month", 10) .add("day", 15) .build(); SEPT_30 = new Marker.Builder() .add("year", 2013) .add("month", 9) .add("day", 30) .build(); NOV_1 = new Marker.Builder() .add("year", 2013) .add("month", 11) .add("day", 1) .build(); PartitionStrategy strategy = new PartitionStrategy.Builder() .year("timestamp") .month("timestamp") .day("timestamp") .build(); comparator = new MarkerComparator(strategy); }
@Test public void testMultipleFields() { checkParser(new PartitionStrategy.Builder() .provided("version") .hash("username", 64) .identity("username", "u") .year("time") .month("time") .day("time") .hour("time") .minute("time") .dateFormat("time", "datetime", "yyyy_MM_dd_HHmmss") .build(), "[ " + "{\"type\": \"provided\", \"name\": \"version\"}," + "{\"type\": \"hash\", \"source\": \"username\", \"buckets\": 64}," + "{\"type\": \"identity\"," + "\"source\": \"username\", \"name\": \"u\"}," + "{\"type\": \"year\", \"source\": \"time\"}," + "{\"type\": \"month\", \"source\": \"time\"}," + "{\"type\": \"day\", \"source\": \"time\"}," + "{\"type\": \"hour\", \"source\": \"time\"}," + "{\"type\": \"minute\", \"source\": \"time\"}," + "{\"type\": \"dateFormat\", \"source\": \"time\", " + "\"name\": \"datetime\", \"format\": \"yyyy_MM_dd_HHmmss\"}" + " ]"); }
@Test public void testAllowedPartitionSchemaCombinations() { Compatibility.checkDescriptor( new DatasetDescriptor.Builder() .schema(schema) .partitionStrategy(new PartitionStrategy.Builder() .year("timestamp") .month("timestamp") .day("timestamp") .hour("timestamp") .minute("timestamp") .identity("message", "message_copy") .identity("timestamp", "ts") .identity("number", "num") .hash("message", 48) .hash("timestamp", 48) .hash("number", 48) .hash("payload", 48) .hash("float", 48) .hash("double", 48) .hash("bool", 48) .range("number", 5, 10, 15, 20) .range("message", "m", "z", "M", "Z") .build()) .build()); }
@Before public void setup() throws Exception { this.conf = (distributed ? MiniDFSTest.getConfiguration() : new Configuration()); this.fs = FileSystem.get(conf); this.trashPolicy = TrashPolicy.getInstance(conf, fs, fs.getHomeDirectory()); this.repo = newRepo(); this.strategy = new PartitionStrategy.Builder() .year("timestamp") .month("timestamp") .day("timestamp") .build(); this.testDescriptor = new DatasetDescriptor.Builder() .schemaUri("resource:standard_event.avsc") .partitionStrategy(strategy) .build(); repo.delete("ns", "test"); this.unbounded = repo.create("ns", "test", testDescriptor); this.valueDescriptor = new DatasetDescriptor.Builder().schemaUri("resource:value.avsc").build(); repo.delete("ns", "value"); this.valueView = repo.create("ns", "value", valueDescriptor); this.testValueView = repo.load("ns", "value", TestValue.class); }
@Test @Ignore public void testDuplicateFieldNames() { Assert.assertNotNull("Should allow duplicate source fields", new PartitionStrategy.Builder() .year("timestamp").month("timestamp") .build()); TestHelpers.assertThrows("Should reject duplicate partition fields", ValidationException.class, new Runnable() { @Override public void run() { new PartitionStrategy.Builder() .identity("number", "num") .identity("number2", "num") .build(); } }); }
@Before public void setup() throws Exception { this.conf = new Configuration(); this.fs = FileSystem.get(conf); this.repo = newRepo(); this.strategy = new PartitionStrategy.Builder() .year("timestamp") .month("timestamp") .day("timestamp") .hash("user_id", 2) .build(); this.testDescriptor = new DatasetDescriptor.Builder() .schemaUri("resource:standard_event.avsc") .partitionStrategy(strategy) .build(); this.testDataset = repo.create("ns", "test", testDescriptor); }
@Test @SuppressWarnings("unchecked") public void testToKey() { PartitionStrategy strategy = new PartitionStrategy.Builder() .year("timestamp") .month("timestamp") .day("timestamp") .build(); StorageKey expected = new StorageKey(strategy); expected.replaceValues((List) Lists.newArrayList(2013, 11, 5)); Assert.assertEquals(expected, convert.toKey( new Path("year=2013/month=11/day=5"), new StorageKey(strategy))); }
@Override public void run() { new DatasetDescriptor.Builder() .schema(SchemaBuilder.record("Record").fields() .requiredInt("timestamp") .endRecord()) .partitionStrategy(new PartitionStrategy.Builder() .year("timestamp").month("timestamp").day("timestamp") .build()) .build(); } });