@Override public void run(DatasetContext context) throws Exception { FileSet checkpointFileSet = context.getDataset(DataStreamsApp.CHECKPOINT_FILESET); checkpointBaseRef.set(checkpointFileSet.getBaseLocation()); } }, Exception.class);
private Location getQuarantineLocation() throws IOException { // each transaction must not share its quarantine directory with another transaction return files.getBaseLocation().append(QUARANTINE_DIR + "." + tx.getTransactionId()); }
private Location getQuarantineLocation() throws IOException { // each transaction must not share its quarantine directory with another transaction return files.getBaseLocation().append(QUARANTINE_DIR + "." + tx.getTransactionId()); }
private Long getLatestSnapshot() throws IOException { Location stateFile = files.getEmbeddedFileSet().getBaseLocation().append(STATE_FILE_NAME); if (!stateFile.exists()) { return null; } try (InputStreamReader reader = new InputStreamReader(stateFile.getInputStream(), Charsets.UTF_8)) { String val = CharStreams.toString(reader); return Long.valueOf(val); } }
private Location lock() throws IOException, InterruptedException { // create a lock file in case there is somebody updating the latest snapshot Location lockFile = files.getEmbeddedFileSet().getBaseLocation().append("lock"); int retries = 0; int maxRetries = 20; while (!lockFile.createNew()) { if (retries > maxRetries) { throw new IOException("Failed to create lock file. If there is a file named 'lock' in the " + "base path, but there is nobody updating the latest snapshot, please delete the 'lock' file."); } TimeUnit.SECONDS.sleep(1); retries++; } return lockFile; } }
@Override public void run(DatasetContext context) throws Exception { FileSet sourceFileSet = context.getDataset(conf.sourceFileset); FileSet destinationFileSet = context.getDataset(conf.destinationFileset); Pattern pattern = Pattern.compile(conf.filterRegex); for (Location sourceFile : sourceFileSet.getBaseLocation().list()) { if (pattern.matcher(sourceFile.getName()).matches()) { Location destFile = destinationFileSet.getBaseLocation().append(sourceFile.getName()); sourceFile.renameTo(destFile); } } } });
@Override public void run(BatchActionContext context) throws Exception { if (!context.isSuccessful()) { return; } FileSet fileSet = context.getDataset(conf.filesetName); Pattern pattern = Pattern.compile(conf.deleteRegex); for (Location fileLocation : fileSet.getBaseLocation().list()) { if (pattern.matcher(fileLocation.getName()).matches()) { fileLocation.delete(); } } }
public Location getFailureLocation() throws IOException { return delegate.getBaseLocation().append("failure"); }
public Location getSuccessLocation() throws IOException { return delegate.getBaseLocation().append("success"); }
public void onSuccess(long snapshotTime) throws IOException, InterruptedException { Location lock = lock(); try { // update state file that contains the latest snapshot Long latestSnapshot = getLatestSnapshot(); if (latestSnapshot == null || snapshotTime > latestSnapshot) { Location stateFile = files.getEmbeddedFileSet().getBaseLocation().append(STATE_FILE_NAME); stateFile.delete(); try (OutputStream outputStream = stateFile.getOutputStream()) { outputStream.write(String.valueOf(snapshotTime).getBytes(Charsets.UTF_8)); } } } finally { lock.delete(); } }
@Test public void testMultipleOutputs() throws Exception { ApplicationWithPrograms app = deployApp(AppWithMapReduceUsingMultipleOutputs.class); final FileSet fileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleOutputs.PURCHASES); Location inputFile = fileSet.getBaseLocation().append("inputFile"); inputFile.createNew(); PrintWriter writer = new PrintWriter(inputFile.getOutputStream()); // the PURCHASES dataset consists of purchase records in the format: <customerId> <spend> writer.println("1 20"); writer.println("1 65"); writer.println("1 30"); writer.println("2 5"); writer.println("2 53"); writer.println("2 45"); writer.println("3 101"); writer.close(); // Using multiple outputs, this MapReduce send the records to a different path of the same dataset, depending // on the value in the data (large spend amounts will go to one file, while small will go to another file. runProgram(app, AppWithMapReduceUsingMultipleOutputs.SeparatePurchases.class, new BasicArguments()); FileSet outputFileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleOutputs.SEPARATED_PURCHASES); Assert.assertEquals(ImmutableList.of("1 20", "1 30", "2 5", "2 45"), readFromOutput(outputFileSet, "small_purchases")); Assert.assertEquals(ImmutableList.of("1 65", "2 53", "3 101"), readFromOutput(outputFileSet, "large_purchases")); }
private void validateFiles(String dataset, Location expectedExisting) throws Exception { DataSetManager<PartitionedFileSet> pfs = getDataset(testSpace.dataset(dataset)); Location base = pfs.get().getEmbeddedFileSet().getBaseLocation(); validateFiles(base, expectedExisting); }
Location inputFile = fileSet.getBaseLocation().append("inputFile"); inputFile.createNew(); inputFile = fileSet.getBaseLocation().append("inputFile"); inputFile.createNew(); inputFile = fileSet2.getBaseLocation().append("inputFile"); inputFile.createNew(); FileSet outputFileSet = datasetCache.getDataset(AppWithMapReduceUsingMultipleInputs.OUTPUT_DATASET); Location outputLocation = outputFileSet.getBaseLocation().append("output").append("part-r-00000");
Location base = fs.getBaseLocation(); String permissions = base.getPermissions(); char groupWriteFlag = permissions.charAt(4); // rwxrwxrwx
String basePath = pfs.getEmbeddedFileSet().getBaseLocation().toURI().getPath(); String absPath3 = location3.toURI().getPath(); Assert.assertTrue(absPath3.startsWith(basePath));
@Before public void before() throws Exception { dsFrameworkUtil.createInstance("partitionedFileSet", pfsInstance, PartitionedFileSetProperties.builder() .setPartitioning(PARTITIONING_1) .setBasePath("testDir") .build()); pfsBaseLocation = ((PartitionedFileSet) dsFrameworkUtil.getInstance(pfsInstance)) .getEmbeddedFileSet().getBaseLocation(); Assert.assertTrue(pfsBaseLocation.exists()); }
DataSetManager<FileSet> bManager = getDataset("b"); String bFormat = bManager.get().getInputFormatClassName(); String bPath = bManager.get().getBaseLocation().toURI().getPath(); Assert.assertTrue(bPath.endsWith("some/path/")); bManager.flush(); String newBPath = bManager.get().getBaseLocation().toURI().getPath(); Assert.assertTrue(newBPath.endsWith("/extra/"));
@Before public void before() throws Exception { txClient = new InMemoryTxSystemClient(dsFrameworkUtil.getTxManager()); dsFrameworkUtil.createInstance("partitionedFileSet", pfsInstance, PartitionedFileSetProperties.builder() .setPartitioning(PARTITIONING_1) .setTablePermissions(tablePermissions) .setBasePath("testDir") .setFilePermissions(fsPermissions) .setFileGroup(group) .build()); pfsBaseLocation = ((PartitionedFileSet) dsFrameworkUtil.getInstance(pfsInstance)) .getEmbeddedFileSet().getBaseLocation(); Assert.assertTrue(pfsBaseLocation.exists()); }
@Test public void testDefaultBasePath() throws Exception { DatasetId id = DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testDefaultPath"); dsFrameworkUtil.createInstance("partitionedFileSet", id, PartitionedFileSetProperties.builder() .setPartitioning(PARTITIONING_1) .build()); PartitionedFileSet pfs = dsFrameworkUtil.getInstance(id); Location baseLocation = pfs.getEmbeddedFileSet().getBaseLocation(); Assert.assertEquals(baseLocation.getName(), id.getDataset()); Assert.assertTrue(baseLocation.exists()); Assert.assertTrue(baseLocation.isDirectory()); DatasetId fid = DatasetFrameworkTestUtil.NAMESPACE_ID.dataset("testDefaultPathFileSet"); dsFrameworkUtil.createInstance("fileSet", fid, FileSetProperties.builder().build()); FileSet fs = dsFrameworkUtil.getInstance(fid); Location fsBaseLocation = fs.getBaseLocation(); Assert.assertEquals(Locations.getParent(baseLocation), Locations.getParent(fsBaseLocation)); dsFrameworkUtil.deleteInstance(fid); dsFrameworkUtil.deleteInstance(id); Assert.assertFalse(baseLocation.exists()); }
Location base = fs.getBaseLocation(); Assert.assertEquals(group, base.getGroup()); Assert.assertEquals(fsPermissions, base.getPermissions()); base = fs.getBaseLocation(); child = base.append("p"); grandchild = child.append("q");