@Test public void testFieldOrder() { Partitioning partitioning = Partitioning.builder() .addIntField("1") .addLongField("2") .addStringField("3") .build(); Iterator<Map.Entry<String, FieldType>> iterator = partitioning.getFields().entrySet().iterator(); Assert.assertEquals("1", iterator.next().getKey()); Assert.assertEquals("2", iterator.next().getKey()); Assert.assertEquals("3", iterator.next().getKey()); Assert.assertFalse(iterator.hasNext()); // the previous order may have been preserved by chance. Now try the reverse order partitioning = Partitioning.builder() .addIntField("3") .addLongField("2") .addStringField("1") .build(); iterator = partitioning.getFields().entrySet().iterator(); Assert.assertEquals("3", iterator.next().getKey()); Assert.assertEquals("2", iterator.next().getKey()); Assert.assertEquals("1", iterator.next().getKey()); Assert.assertFalse(iterator.hasNext()); }
addDatasetInstance(PartitionedFileSet.class.getName(), orcPFS, PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addLongField("time").build())
Partitioning.builder().addIntField("i").addStringField("s").build()).build(); DatasetSpecification spec = pfsDef.configure("pfs", props); Partitioning.builder().addStringField("s").build()).build(); try { ((Reconfigurable) pfsDef).reconfigure("pfs", noIprops, spec); Partitioning.builder().addLongField("i").addStringField("s").build()).build(); try { ((Reconfigurable) pfsDef).reconfigure("pfs", longIprops, spec); Partitioning.builder().addStringField("s").addIntField("i").build()).build(); try { ((Reconfigurable) pfsDef).reconfigure("pfs", revProps, spec); .setPartitioning(Partitioning.builder().addStringField("s").build()) .add(PartitionedFileSetDefinition.NAME_AS_BASE_PATH_DEFAULT, "false") .build(); .setPartitioning(Partitioning.builder().addStringField("s").build()) .build(); oldSpec = pfsDef.configure("pfs", props);
.setPartitioning(Partitioning.builder().addLongField("time").build()) .setPartitioning(Partitioning.builder().addLongField("time").build())
@Override public void configure() { try { createDataset("fs", FileSet.class, FileSetProperties.builder() .setInputFormat(MyTextInputFormat.class) .setOutputFormat(MyTextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); createDataset("pfs", PartitionedFileSet.class, PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addStringField("x").build()) .setInputFormat(MyTextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); createDataset("tpfs", TimePartitionedFileSet.class, FileSetProperties.builder() .setInputFormat(MyTextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); createDataset("myfs", MyFileSet.class, FileSetProperties.builder() .setInputFormat(MyTextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); addSpark(new FileCountSparkProgram()); addSpark(new ScalaFileCountSparkProgram()); } catch (Throwable t) { throw Throwables.propagate(t); } }
@Override public void configure() { setName("AppWithMapReduceUsingFile"); setDescription("Application with MapReduce job using file as dataset"); createDataset(INPUT, "table"); createDataset(OUTPUT, "table"); Class<? extends InputFormat> inputFormatClass = getConfig().isUseCombineFileInputFormat() ? CombineTextInputFormat.class : TextInputFormat.class; createDataset(PARTITIONED, "partitionedFileSet", PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder() .addStringField("type") .addLongField("time") .build()) // properties for file set .setBasePath("partitioned") .setInputFormat(inputFormatClass) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, SEPARATOR) // don't configure properties for the Hive table - this is used in a context where explore is disabled .build()); addMapReduce(new PartitionWriter()); addMapReduce(new PartitionReader()); }
@Override public void configure() { setName("AppWithMapReduce"); setDescription("Application with MapReduce job"); createDataset("beforeSubmit", KeyValueTable.class); createDataset("onFinish", KeyValueTable.class); createDataset("timeSeries", TimeseriesTable.class); createDataset("counters", Table.class); createDataset("countersFromContext", Table.class); createDataset("recorder", KeyValueTable.class); createDataset("pfs", PartitionedFileSet.class, PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addIntField("x").build()) .setOutputFormat(TextOutputFormat.class).build()); addMapReduce(new ClassicWordCount()); addMapReduce(new AggregateTimeseriesByTag()); addMapReduce(new FaiiingMR()); addMapReduce(new ExplicitFaiiingMR()); addMapReduce(new MapReduceWithFailingOutputCommitter()); }
@Test public void testMultipleTransitiveDependencies() throws DatasetManagementException, IOException { // Adding modules DatasetFramework framework = getFramework(); try { framework.addModule(IN_MEMORY, new InMemoryTableModule()); framework.addModule(CORE, new CoreDatasetsModule()); framework.addModule(FILE, new FileSetModule()); framework.addModule(PFS, new PartitionedFileSetModule()); framework.addModule(TWICE, new SingleTypeModule(EmbedsTableTwiceDataset.class)); // Creating an instances framework.addInstance(EmbedsTableTwiceDataset.class.getName(), MY_DS, PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addStringField("x").build()) .build()); Assert.assertTrue(framework.hasInstance(MY_DS)); framework.getDataset(MY_DS, DatasetProperties.EMPTY.getProperties(), null); } finally { framework.deleteAllInstances(NAMESPACE_ID); framework.deleteAllModules(NAMESPACE_ID); } }
@Test public void testBuilderGetters() { Partitioning partitioning = Partitioning.builder() .addField("a", FieldType.STRING) .addField("b", FieldType.INT) .addField("c", FieldType.LONG) .addStringField("d") .addIntField("e") .addLongField("f") .build(); Assert.assertEquals(FieldType.STRING, partitioning.getFieldType("a")); Assert.assertEquals(FieldType.INT, partitioning.getFieldType("b")); Assert.assertEquals(FieldType.LONG, partitioning.getFieldType("c")); Assert.assertEquals(FieldType.STRING, partitioning.getFieldType("d")); Assert.assertEquals(FieldType.INT, partitioning.getFieldType("e")); Assert.assertEquals(FieldType.LONG, partitioning.getFieldType("f")); Assert.assertNull(partitioning.getFieldType("x")); Assert.assertEquals(partitioning.getFields().keySet(), ImmutableSet.of("a", "b", "c", "d", "e", "f")); }
public static PartitionedFileSetProperties.Builder getBaseProperties(SnapshotFileSetConfig config) { PartitionedFileSetProperties.Builder propertiesBuilder = PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addLongField(SNAPSHOT_FIELD).build()); if (!Strings.isNullOrEmpty(config.getBasePath())) { propertiesBuilder.setBasePath(config.getBasePath()); } try { Map<String, String> properties = GSON.fromJson(config.getFileProperties(), MAP_TYPE); if (properties != null) { propertiesBuilder.addAll(properties); } } catch (Exception e) { throw new IllegalArgumentException("Could not decode the 'properties' setting. Please check that it " + "is a JSON Object of string to string. Failed with error: " + e.getMessage(), e); } return propertiesBuilder; }
@Override public void configure() { addService(new PartitionService()); // Create a partitioned file set, configure it to work with MapReduce and with Explore createDataset("pfs", PartitionedFileSet.class, PartitionedFileSetProperties.builder() // Properties for partitioning .setPartitioning(Partitioning.builder().addStringField("partition").addIntField("sub-partition").build()) // Properties for file set .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") // Properties for Explore (to create a partitioned Hive table) .setEnableExploreOnCreate(true) .setExploreFormat("csv") .setExploreSchema("f1 STRING, f2 INT") .setDescription("App for testing authorization in partitioned filesets.") .build()); }
@Override public void configure() { createDataset(INPUT, KeyValueTable.class.getName(), DatasetProperties.EMPTY); // create two pfs, identical except for their (table) names for (String name : new String[] { PFS, OTHER }) { createDataset(name, PartitionedFileSet.class.getName(), PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addIntField("number").build()) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") .setEnableExploreOnCreate(true) .setExploreTableName(name) .setExploreSchema("key STRING, value STRING") .setExploreFormat("csv") .build()); } addMapReduce(new PartitionWriterMR()); }
@Test(expected = IllegalArgumentException.class) public void testBuilderEmptyName() { Partitioning.builder().addStringField("").build(); }
@Test(expected = IllegalStateException.class) public void testBuilderEmpty() { Partitioning.builder().build(); }
/** * Add field of type LONG. * * @param name the field name * * @throws java.lang.IllegalArgumentException if the field name is null, empty, or already exists. */ public Builder addLongField(String name) { return addField(name, FieldType.LONG); }
@Test public void testFileSetReconfigure() throws IncompatibleUpdateException { testFileSetReconfigure(registry.get(FileSet.class.getName())); testFileSetReconfigure(registry.get(PartitionedFileSet.class.getName()), PartitionedFileSetProperties.builder().setPartitioning( Partitioning.builder().addIntField("i").build()).build()); testFileSetReconfigure(registry.get(TimePartitionedFileSet.class.getName())); }
/** * Add field of type INT. * * @param name the field name * * @throws java.lang.IllegalArgumentException if the field name is null, empty, or already exists. */ public Builder addIntField(String name) { return addField(name, FieldType.INT); }
/** * Add field of type STRING. * * @param name the field name * * @throws java.lang.IllegalArgumentException if the field name is null, empty, or already exists. */ public Builder addStringField(String name) { return addField(name, FieldType.STRING); }
@Test(expected = IllegalArgumentException.class) public void testBuilderDuplicate() { Partitioning.builder() .addField("name", FieldType.STRING) .addIntField("age") .addStringField("name") .build(); }