return builder.build();
public static PartitionedFileSetProperties.Builder getBaseProperties(SnapshotFileSetConfig config) { PartitionedFileSetProperties.Builder propertiesBuilder = PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addLongField(SNAPSHOT_FIELD).build()); if (!Strings.isNullOrEmpty(config.getBasePath())) { propertiesBuilder.setBasePath(config.getBasePath()); } try { Map<String, String> properties = GSON.fromJson(config.getFileProperties(), MAP_TYPE); if (properties != null) { propertiesBuilder.addAll(properties); } } catch (Exception e) { throw new IllegalArgumentException("Could not decode the 'properties' setting. Please check that it " + "is a JSON Object of string to string. Failed with error: " + e.getMessage(), e); } return propertiesBuilder; }
@Test public void testFieldOrder() { Partitioning partitioning = Partitioning.builder() .addIntField("1") .addLongField("2") .addStringField("3") .build(); Iterator<Map.Entry<String, FieldType>> iterator = partitioning.getFields().entrySet().iterator(); Assert.assertEquals("1", iterator.next().getKey()); Assert.assertEquals("2", iterator.next().getKey()); Assert.assertEquals("3", iterator.next().getKey()); Assert.assertFalse(iterator.hasNext()); // the previous order may have been preserved by chance. Now try the reverse order partitioning = Partitioning.builder() .addIntField("3") .addLongField("2") .addStringField("1") .build(); iterator = partitioning.getFields().entrySet().iterator(); Assert.assertEquals("3", iterator.next().getKey()); Assert.assertEquals("2", iterator.next().getKey()); Assert.assertEquals("1", iterator.next().getKey()); Assert.assertFalse(iterator.hasNext()); }
@Override public void configure() { try { createDataset("fs", FileSet.class, FileSetProperties.builder() .setInputFormat(MyTextInputFormat.class) .setOutputFormat(MyTextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); createDataset("pfs", PartitionedFileSet.class, PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addStringField("x").build()) .setInputFormat(MyTextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); createDataset("tpfs", TimePartitionedFileSet.class, FileSetProperties.builder() .setInputFormat(MyTextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); createDataset("myfs", MyFileSet.class, FileSetProperties.builder() .setInputFormat(MyTextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); addSpark(new FileCountSparkProgram()); addSpark(new ScalaFileCountSparkProgram()); } catch (Throwable t) { throw Throwables.propagate(t); } }
@Override public void configure() { setName("AppWithMapReduceUsingFile"); setDescription("Application with MapReduce job using file as dataset"); createDataset(INPUT, "table"); createDataset(OUTPUT, "table"); Class<? extends InputFormat> inputFormatClass = getConfig().isUseCombineFileInputFormat() ? CombineTextInputFormat.class : TextInputFormat.class; createDataset(PARTITIONED, "partitionedFileSet", PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder() .addStringField("type") .addLongField("time") .build()) // properties for file set .setBasePath("partitioned") .setInputFormat(inputFormatClass) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, SEPARATOR) // don't configure properties for the Hive table - this is used in a context where explore is disabled .build()); addMapReduce(new PartitionWriter()); addMapReduce(new PartitionReader()); }
@Test public void testBuilderGetters() { Partitioning partitioning = Partitioning.builder() .addField("a", FieldType.STRING) .addField("b", FieldType.INT) .addField("c", FieldType.LONG) .addStringField("d") .addIntField("e") .addLongField("f") .build(); Assert.assertEquals(FieldType.STRING, partitioning.getFieldType("a")); Assert.assertEquals(FieldType.INT, partitioning.getFieldType("b")); Assert.assertEquals(FieldType.LONG, partitioning.getFieldType("c")); Assert.assertEquals(FieldType.STRING, partitioning.getFieldType("d")); Assert.assertEquals(FieldType.INT, partitioning.getFieldType("e")); Assert.assertEquals(FieldType.LONG, partitioning.getFieldType("f")); Assert.assertNull(partitioning.getFieldType("x")); Assert.assertEquals(partitioning.getFields().keySet(), ImmutableSet.of("a", "b", "c", "d", "e", "f")); }
@Override public void configure() { setName("AppWithMapReduce"); setDescription("Application with MapReduce job"); createDataset("beforeSubmit", KeyValueTable.class); createDataset("onFinish", KeyValueTable.class); createDataset("timeSeries", TimeseriesTable.class); createDataset("counters", Table.class); createDataset("countersFromContext", Table.class); createDataset("recorder", KeyValueTable.class); createDataset("pfs", PartitionedFileSet.class, PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addIntField("x").build()) .setOutputFormat(TextOutputFormat.class).build()); addMapReduce(new ClassicWordCount()); addMapReduce(new AggregateTimeseriesByTag()); addMapReduce(new FaiiingMR()); addMapReduce(new ExplicitFaiiingMR()); addMapReduce(new MapReduceWithFailingOutputCommitter()); }
@Test public void testMultipleTransitiveDependencies() throws DatasetManagementException, IOException { // Adding modules DatasetFramework framework = getFramework(); try { framework.addModule(IN_MEMORY, new InMemoryTableModule()); framework.addModule(CORE, new CoreDatasetsModule()); framework.addModule(FILE, new FileSetModule()); framework.addModule(PFS, new PartitionedFileSetModule()); framework.addModule(TWICE, new SingleTypeModule(EmbedsTableTwiceDataset.class)); // Creating an instances framework.addInstance(EmbedsTableTwiceDataset.class.getName(), MY_DS, PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addStringField("x").build()) .build()); Assert.assertTrue(framework.hasInstance(MY_DS)); framework.getDataset(MY_DS, DatasetProperties.EMPTY.getProperties(), null); } finally { framework.deleteAllInstances(NAMESPACE_ID); framework.deleteAllModules(NAMESPACE_ID); } }
@Override public void configure() { addService(new PartitionService()); // Create a partitioned file set, configure it to work with MapReduce and with Explore createDataset("pfs", PartitionedFileSet.class, PartitionedFileSetProperties.builder() // Properties for partitioning .setPartitioning(Partitioning.builder().addStringField("partition").addIntField("sub-partition").build()) // Properties for file set .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") // Properties for Explore (to create a partitioned Hive table) .setEnableExploreOnCreate(true) .setExploreFormat("csv") .setExploreSchema("f1 STRING, f2 INT") .setDescription("App for testing authorization in partitioned filesets.") .build()); }
@Override public void configure() { createDataset(INPUT, KeyValueTable.class.getName(), DatasetProperties.EMPTY); // create two pfs, identical except for their (table) names for (String name : new String[] { PFS, OTHER }) { createDataset(name, PartitionedFileSet.class.getName(), PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addIntField("number").build()) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") .setEnableExploreOnCreate(true) .setExploreTableName(name) .setExploreSchema("key STRING, value STRING") .setExploreFormat("csv") .build()); } addMapReduce(new PartitionWriterMR()); }
@Test(expected = IllegalArgumentException.class) public void testBuilderEmptyName() { Partitioning.builder().addStringField("").build(); }
@Test(expected = IllegalStateException.class) public void testBuilderEmpty() { Partitioning.builder().build(); }
@Test public void testFileSetReconfigure() throws IncompatibleUpdateException { testFileSetReconfigure(registry.get(FileSet.class.getName())); testFileSetReconfigure(registry.get(PartitionedFileSet.class.getName()), PartitionedFileSetProperties.builder().setPartitioning( Partitioning.builder().addIntField("i").build()).build()); testFileSetReconfigure(registry.get(TimePartitionedFileSet.class.getName())); }
@Test(expected = IllegalArgumentException.class) public void testBuilderDuplicate() { Partitioning.builder() .addField("name", FieldType.STRING) .addIntField("age") .addStringField("name") .build(); }
private void testIllegalFieldValue(Function<PartitionKey.Builder, PartitionKey.Builder> function) { PartitionKey.Builder builder = PartitionKey.builder( Partitioning.builder().addIntField("x").addLongField("y").addStringField("z").build()); try { function.apply(builder); Assert.fail("builder should have thrown exception for invalid field type"); } catch (IllegalArgumentException e) { //expected } }
@Test(expected = IllegalArgumentException.class) public void testBuilderNullType() { //noinspection ConstantConditions Partitioning.builder().addField("x", null).build(); }
@Test(expected = IllegalArgumentException.class) public void testBuilderUnknownField() { PartitionKey.builder( Partitioning.builder().addIntField("x").addLongField("y").build()) .addField("x", 10).addField("y", 10L).addField("z", 15).build(); }
@Override public void configure() { // A PFS for storing uploaded file createDataset(PFS_NAME, PartitionedFileSet.class, PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addLongField("time").build()) .setInputFormat(TextInputFormat.class) .build() ); // A KV table for tracking chunks sizes createDataset(KV_TABLE_NAME, KeyValueTable.class); addService(SERVICE_NAME, new FileHandler()); }
@Test(expected = IllegalArgumentException.class) public void testBuilderNullName() { //noinspection ConstantConditions Partitioning.builder().addField(null, FieldType.STRING).build(); }
@Test(expected = IllegalStateException.class) public void testBuilderMissingField() { PartitionKey.builder( Partitioning.builder().addIntField("x").addLongField("y").addStringField("z").build()) .addField("x", 10).addField("y", 10L).build(); }