@Override public void run(DatasetContext context) throws Exception { Map<String, String> args = sec.getRuntimeArguments(); String outputPath = args.get("output.path"); Map<String, String> fileSetArgs = new HashMap<>(); FileSetArguments.setOutputPath(fileSetArgs, outputPath); FileSet fileSet = context.getDataset(WorkflowAppWithLocalDatasets.CSV_FILESET_DATASET, fileSetArgs); try (PrintWriter writer = new PrintWriter(fileSet.getOutputLocation().getOutputStream())) { for (String line : converted) { writer.write(line); writer.println(); } } } });
@Test public void testAbsolutePath() throws IOException, DatasetManagementException { String absolutePath = tmpFolder.newFolder() + "/absolute/path"; dsFrameworkUtil.createInstance("fileSet", testFileSetInstance3, FileSetProperties.builder() .setBasePath(absolutePath).build()); // validate that the base path for the file set was created Assert.assertTrue(new File(absolutePath).isDirectory()); // instantiate the file set with an output path Map<String, String> fileArgs = Maps.newHashMap(); FileSetArguments.setOutputPath(fileArgs, "out"); FileSet fileSet = dsFrameworkUtil.getInstance(testFileSetInstance3, fileArgs); // write to the output path Assert.assertEquals(absolutePath + "/out", fileSet.getOutputLocation().toURI().getPath()); try (OutputStream out = fileSet.getOutputLocation().getOutputStream()) { out.write(42); } // validate that the file was created Assert.assertTrue(new File(absolutePath + "/out").isFile()); }
@Test public void testWriteRead() throws IOException, DatasetManagementException { FileSet fileSet1 = createFileset(testFileSetInstance1); FileSet fileSet2 = createFileset(testFileSetInstance2); Location fileSet1Output = fileSet1.getOutputLocation(); Location fileSet2Output = fileSet2.getOutputLocation(); Location fileSet1NsDir = Locations.getParent(Locations.getParent(Locations.getParent(fileSet1Output))); Location fileSet2NsDir = Locations.getParent(Locations.getParent(Locations.getParent(fileSet2Output))); Assert.assertNotNull(fileSet1NsDir); Assert.assertNotNull(fileSet2NsDir); Assert.assertEquals(fileSet1NsDir.getName(), DatasetFrameworkTestUtil.NAMESPACE_ID.getNamespace()); Assert.assertEquals(fileSet2NsDir.getName(), OTHER_NAMESPACE.getNamespace()); Assert.assertNotEquals(fileSet1.getInputLocations().get(0).toURI().getPath(), fileSet2.getInputLocations().get(0).toURI().getPath()); Assert.assertNotEquals(fileSet1Output.toURI().getPath(), fileSet2Output.toURI().getPath()); try (OutputStream out = fileSet1.getOutputLocation().getOutputStream()) { out.write(42); } try (OutputStream out = fileSet2.getOutputLocation().getOutputStream()) { out.write(54); } try (InputStream in = fileSet1.getInputLocations().get(0).getInputStream()) { Assert.assertEquals(42, in.read()); } try (InputStream in = fileSet2.getInputLocations().get(0).getInputStream()) { Assert.assertEquals(54, in.read()); } }
fileSet.getOutputLocation(); Assert.fail("Extrernal file set should not allow writing output."); } catch (UnsupportedOperationException e) {
TimePartitionedFileSet ds = dsFrameworkUtil.getInstance(TPFS_INSTANCE, args); String outputPath = ds.getEmbeddedFileSet().getOutputLocation().toURI().getPath(); Assert.assertTrue(outputPath.endsWith("2015-01-01/20_42"));
@Test public void testRollbackOfNonDirectoryOutput() throws IOException, TransactionFailureException, DatasetManagementException { // test deletion of an output location, pointing to a non-directory file FileSet fileSet1 = createFileset(testFileSetInstance1); Location outputFile = fileSet1.getOutputLocation(); Assert.assertFalse(outputFile.exists()); outputFile.getOutputStream().close(); Assert.assertTrue(outputFile.exists()); ((FileSetDataset) fileSet1).onFailure(); // the output file should still not be deleted Assert.assertTrue(outputFile.exists()); }
Location resultLocation = results.getOutputLocation(); if (resultLocation.isDirectory()) { for (Location child : resultLocation.list()) {
@Test public void testRollback() throws IOException, TransactionFailureException, DatasetManagementException { // test deletion of an empty output directory FileSet fileSet1 = createFileset(testFileSetInstance1); Location outputLocation = fileSet1.getOutputLocation(); Assert.assertFalse(outputLocation.exists()); Assert.assertTrue(outputLocation.mkdirs()); Assert.assertTrue(outputLocation.exists()); ((FileSetDataset) fileSet1).onFailure(); Assert.assertFalse(outputLocation.exists()); }
@Test public void testRollbackWithNonEmptyDir() throws IOException, TransactionFailureException, DatasetManagementException { FileSet fileSet1 = createFileset(testFileSetInstance1); Location outputDir = fileSet1.getOutputLocation(); Assert.assertFalse(outputDir.exists()); Assert.assertTrue(outputDir.mkdirs()); Location outputFile = outputDir.append("outputFile"); // this will create the outputFile outputFile.getOutputStream().close(); Assert.assertTrue(outputFile.exists()); Assert.assertTrue(outputDir.exists()); ((FileSetDataset) fileSet1).onFailure(); // both the output dir and file in it should still exist Assert.assertTrue(outputDir.exists()); Assert.assertTrue(outputFile.exists()); } }
@Test public void testRollbackOnJobFailure() throws Exception { // tests the logic of #onFailure method Map<String, String> args = new HashMap<>(); FileSetArguments.setOutputPath(args, "custom/output/path"); PartitionedFileSetArguments.setOutputPartitionKey(args, PARTITION_KEY); PartitionedFileSet pfs = dsFrameworkUtil.getInstance(pfsInstance, args); TransactionContext txContext = new TransactionContext(txClient, (TransactionAware) pfs); txContext.start(); Location outputLocation = pfs.getEmbeddedFileSet().getOutputLocation(); Assert.assertFalse(outputLocation.exists()); outputLocation.mkdirs(); Assert.assertTrue(outputLocation.exists()); ((PartitionedFileSetDataset) pfs).onFailure(); txContext.abort(); // because the previous transaction aborted, the partition as well as the directory for it will not exist txContext.start(); Assert.assertNull(pfs.getPartition(PARTITION_KEY)); Assert.assertFalse(outputLocation.exists()); txContext.finish(); }