@Override public Collection<CopyableFile> getCopyableFiles(FileSystem targetFs, CopyConfiguration configuration) throws IOException { log.info(String.format("Getting copyable files at root path: %s", this.datasetRoot)); List<TimestampedDatasetVersion> versions = Lists.newArrayList(this.datasetVersionFinder.findDatasetVersions(this)); if (versions.isEmpty()) { log.warn("No dataset version can be found. Ignoring."); return Lists.newArrayList(); } Collection<TimestampedDatasetVersion> copyableVersions = this.versionSelectionPolicy.listSelectedVersions(versions); ConcurrentLinkedQueue<CopyableFile> copyableFileList = new ConcurrentLinkedQueue<>(); List<Future<?>> futures = Lists.newArrayList(); for (TimestampedDatasetVersion copyableVersion : copyableVersions) { futures.add(this.executor.submit(this.getCopyableFileGenetator(targetFs, configuration, copyableVersion, copyableFileList))); } try { for (Future<?> future : futures) { future.get(); } } catch (ExecutionException | InterruptedException e) { throw new IOException("Failed to generate copyable files.", e); } finally { ExecutorsUtils.shutdownExecutorService(executor, Optional.of(log)); } return copyableFileList; }
@VisibleForTesting protected CopyableFileGenerator getCopyableFileGenetator(FileSystem targetFs, CopyConfiguration configuration, TimestampedDatasetVersion copyableVersion, ConcurrentLinkedQueue<CopyableFile> copyableFileList) { return new CopyableFileGenerator(this.srcFs, targetFs, configuration, this.datasetRoot, this.getTargetRoot(configuration.getPublishDir()), copyableVersion.getDateTime(), copyableVersion.getPaths(), copyableFileList, this.copyableFileFilter()); }
@Override public String datasetURN() { return this.datasetRoot().toString(); }
/** * Test the {@link TimestampBasedCopyableDataset} constructor with different config options. */ @Test public void testConfigOptions() { Properties props = new Properties(); props.put(TimestampBasedCopyableDataset.COPY_POLICY, TimeBasedCopyPolicyForTest.class.getName()); props.put(TimestampBasedCopyableDataset.DATASET_VERSION_FINDER, TimestampedDatasetVersionFinderForTest.class.getName()); TimestampBasedCopyableDataset copyabledataset1 = new TimestampBasedCopyableDataset(localFs, props, new Path("dummy")); Assert.assertEquals(copyabledataset1.getDatasetVersionFinder().getClass().getName(), TimestampedDatasetVersionFinderForTest.class.getName()); Assert.assertEquals(copyabledataset1.getVersionSelectionPolicy().getClass().getName(), TimeBasedCopyPolicyForTest.class.getName()); // Change the version finder props.put(TimestampBasedCopyableDataset.DATASET_VERSION_FINDER, VersionFinderDoNothingForTest.class.getName()); TimestampBasedCopyableDataset copyabledataset2 = new TimestampBasedCopyableDataset(localFs, props, new Path("dummy")); Assert.assertEquals(copyabledataset2.getDatasetVersionFinder().getClass().getName(), VersionFinderDoNothingForTest.class.getName()); Assert.assertEquals(copyabledataset2.getVersionSelectionPolicy().getClass().getName(), TimeBasedCopyPolicyForTest.class.getName()); }
/** * Test {@link TimestampBasedCopyableDataset.CopyableFileGenerator} when src location is empty and also when it is null. */ @Test(expectedExceptions = RuntimeException.class) public void testCopyableFileGenerator() { Properties props = new Properties(); props.put(TimestampBasedCopyableDataset.COPY_POLICY, TimeBasedCopyPolicyForTest.class.getName()); props.put(TimestampBasedCopyableDataset.DATASET_VERSION_FINDER, TimestampedDatasetVersionFinderForTest.class.getName()); TimestampBasedCopyableDataset copyabledataset = new TimestampBasedCopyableDataset(localFs, props, new Path("dummy")); CopyConfiguration configuration = mock(CopyConfiguration.class); when(configuration.getPublishDir()).thenReturn(new Path("publishDir")); ConcurrentLinkedQueue<CopyableFile> copyableFileList = new ConcurrentLinkedQueue<>(); // The src path is empty. TimestampedDatasetVersion emptyVersion = new TimestampedDatasetVersion(new DateTime(), new Path("dummy2")); TimestampBasedCopyableDataset.CopyableFileGenerator emptyGenerator = copyabledataset.getCopyableFileGenetator(localFs, configuration, emptyVersion, copyableFileList); emptyGenerator.run(); Assert.assertEquals(copyableFileList.size(), 0); // The src path is null. TimestampedDatasetVersion versionHasNullPath = new TimestampedDatasetVersion(new DateTime(), null); TimestampBasedCopyableDataset.CopyableFileGenerator exceptionGenerator = copyabledataset.getCopyableFileGenetator(localFs, configuration, versionHasNullPath, copyableFileList); exceptionGenerator.run(); }
/** * Test the parallel execution to get copyable files in {@link TimestampBasedCopyableDataset#getCopyableFiles(FileSystem, CopyConfiguration)}. */ @Test public void testGetCopyableFiles() throws IOException { Properties props = new Properties(); props.put(TimestampBasedCopyableDataset.COPY_POLICY, TimeBasedCopyPolicyForTest.class.getName()); props.put(TimestampBasedCopyableDataset.DATASET_VERSION_FINDER, TimestampedDatasetVersionFinderForTest.class.getName()); TimestampBasedCopyableDataset copyabledataset = new TimestampBasedCopyableDatasetForTest(localFs, props, new Path("/data/tracking/PVE")); Collection<CopyableFile> copyableFiles = copyabledataset.getCopyableFiles(localFs, null); /** * {@link TimestampedDatasetVersionFinderForTest} will return three versions, and each version will contain two files. * So the total number of copyableFiles should be 6, and all should follow the pattern: dummy\/[\\d]\*\/file[12]. */ Assert.assertEquals(copyableFiles.size(), 6); Pattern pattern = Pattern.compile("dummy/[\\d]*/file[12]"); Set<String> resultFilesets = Sets.newHashSet(); for (CopyableFile copyableFile : copyableFiles) { String copyableFileset = copyableFile.getFileSet(); Assert.assertTrue(pattern.matcher(copyableFileset).matches()); resultFilesets.add(copyableFileset); } Assert.assertEquals(resultFilesets.size(), 6); }
TimestampedDatasetVersionFinderForTest.class.getName()); Path datasetRootPath = this.localFs.getFileStatus(srcRoot).getPath(); TimestampBasedCopyableDataset copyabledataset = new TimestampBasedCopyableDataset(localFs, props, datasetRootPath);
@VisibleForTesting protected CopyableFileGenerator getCopyableFileGenetator(FileSystem targetFs, CopyConfiguration configuration, TimestampedDatasetVersion copyableVersion, ConcurrentLinkedQueue<CopyableFile> copyableFileList) { return new CopyableFileGenerator(this.srcFs, targetFs, configuration, this.datasetRoot, this.getTargetRoot(configuration.getPublishDir()), copyableVersion.getDateTime(), copyableVersion.getPaths(), copyableFileList, this.copyableFileFilter()); }
@Override public Collection<CopyableFile> getCopyableFiles(FileSystem targetFs, CopyConfiguration configuration) throws IOException { log.info(String.format("Getting copyable files at root path: %s", this.datasetRoot)); List<TimestampedDatasetVersion> versions = Lists.newArrayList(this.datasetVersionFinder.findDatasetVersions(this)); if (versions.isEmpty()) { log.warn("No dataset version can be found. Ignoring."); return Lists.newArrayList(); } Collection<TimestampedDatasetVersion> copyableVersions = this.versionSelectionPolicy.listSelectedVersions(versions); ConcurrentLinkedQueue<CopyableFile> copyableFileList = new ConcurrentLinkedQueue<>(); List<Future<?>> futures = Lists.newArrayList(); for (TimestampedDatasetVersion copyableVersion : copyableVersions) { futures.add(this.executor.submit(this.getCopyableFileGenetator(targetFs, configuration, copyableVersion, copyableFileList))); } try { for (Future<?> future : futures) { future.get(); } } catch (ExecutionException | InterruptedException e) { throw new IOException("Failed to generate copyable files.", e); } finally { ExecutorsUtils.shutdownExecutorService(executor, Optional.of(log)); } return copyableFileList; }
@Override public String datasetURN() { return this.datasetRoot().toString(); }