@Override public CopyableDataset datasetAtPath(Path path) throws IOException { return new RecursiveCopyableDataset(this.fs, path, this.props, this.datasetPattern); } }
createPathMap(getFilesAtPath(this.fs, this.rootPath, this.pathFilter), this.rootPath); Map<Path, FileStatus> filesInTarget = createPathMap(getFilesAtPath(targetFs, targetPath, this.pathFilter), targetPath); if (statusInTarget != null) { if (!sameFile(filesInSource.get(entry.getKey()), statusInTarget)) { toCopy.add(entry.getKey()); toDelete.put(entry.getKey(), statusInTarget); Path thisTargetPath = new Path(configuration.getPublishDir(), filePathRelativeToSearchPath); CopyableFile copyableFile = CopyableFile.fromOriginAndDestination(this.fs, file, thisTargetPath, configuration).fileSet(datasetURN()) .datasetOutputPath(thisTargetPath.toString()).ancestorsOwnerAndPermission(CopyableFile .resolveReplicatedOwnerAndPermissionsRecursively(this.fs, file.getPath().getParent(), nonGlobSearchPath, this.deleteEmptyDirectories ? Optional.of(targetPath) : Optional.<Path>absent()); copyEntities.add(new PrePublishStep(datasetURN(), Maps.<String, String>newHashMap(), step, 1));
@Override public String datasetURN() { return datasetRoot().toString(); }
@Override protected List<FileStatus> getFilesAtPath(FileSystem fs, Path path, PathFilter fileFilter) throws IOException { DateTimeFormatter formatter = DateTimeFormat.forPattern(datePattern); LocalDateTime endDate = currentTime; LocalDateTime startDate = endDate.minus(this.lookbackPeriod); DateRangeIterator dateRangeIterator = new DateRangeIterator(startDate, endDate, isPatternHourly); List<FileStatus> fileStatuses = Lists.newArrayList(); while (dateRangeIterator.hasNext()) { Path pathWithDateTime = new Path(path, dateRangeIterator.next().toString(formatter)); fileStatuses.addAll(super.getFilesAtPath(fs, pathWithDateTime, fileFilter)); } return fileStatuses; } }
@Test public void testCopyWithConflictingCollisionDueToModtime() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1", 1, 10), createFileStatus(source, "file2")); List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file1", 1, 9)); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); try { Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.fail(); } catch (IOException ioe) { // should throw exception due to collision } }
@Override protected List<FileStatus> getFilesAtPath(FileSystem fs, Path path, PathFilter fileFilter) throws IOException { DateTimeFormatter formatter = DateTimeFormat.forPattern(datePattern); LocalDateTime endDate = currentTime; LocalDateTime startDate = endDate.minus(this.lookbackPeriod); DateRangeIterator dateRangeIterator = new DateRangeIterator(startDate, endDate, isPatternHourly); List<FileStatus> fileStatuses = Lists.newArrayList(); while (dateRangeIterator.hasNext()) { Path pathWithDateTime = new Path(path, dateRangeIterator.next().toString(formatter)); fileStatuses.addAll(super.getFilesAtPath(fs, pathWithDateTime, fileFilter)); } return fileStatuses; } }
@Test public void testCorrectComputationOfTargetPathsWhenUsingGlob() throws Exception { Path source = new Path("/source/directory"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1")); List<FileStatus> targetFiles = Lists.newArrayList(); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); FileSystem sourceUnderlyingFS = FileSystem.getLocal(new Configuration()); FileSystem sourceFs = Mockito.spy(sourceUnderlyingFS); Mockito.doReturn(new FileStatus(0, true, 0, 0, 0, source)).when(sourceFs).getFileStatus(source); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, new Path(target, "directory"), sourceFiles, targetFiles, properties, new Path("/source/*"), sourceFs); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.get(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.assertEquals(copyableFiles.size(), 1); ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")), new Path(target, "directory/file1")); }
createPathMap(getFilesAtPath(this.fs, this.rootPath, this.pathFilter), this.rootPath); Map<Path, FileStatus> filesInTarget = createPathMap(getFilesAtPath(targetFs, targetPath, this.pathFilter), targetPath); if (statusInTarget != null) { if (!sameFile(filesInSource.get(entry.getKey()), statusInTarget)) { toCopy.add(entry.getKey()); toDelete.put(entry.getKey(), statusInTarget); Path thisTargetPath = new Path(configuration.getPublishDir(), filePathRelativeToSearchPath); CopyableFile copyableFile = CopyableFile.fromOriginAndDestination(this.fs, file, thisTargetPath, configuration).fileSet(datasetURN()) .datasetOutputPath(thisTargetPath.toString()).ancestorsOwnerAndPermission(CopyableFile .resolveReplicatedOwnerAndPermissionsRecursively(this.fs, file.getPath().getParent(), nonGlobSearchPath, this.deleteEmptyDirectories ? Optional.of(targetPath) : Optional.<Path>absent()); copyEntities.add(new PrePublishStep(datasetURN(), Maps.<String, String>newHashMap(), step, 1));
@Override public CopyableDataset datasetAtPath(Path path) throws IOException { return new RecursiveCopyableDataset(this.fs, path, this.props, this.datasetPattern); } }
@Override public String datasetURN() { return datasetRoot().toString(); }
@Test public void testCopyWithConflictingCollisionDueToSize() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1", 1), createFileStatus(source, "file2")); List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file1", 2)); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); try { Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.fail(); } catch (IOException ioe) { // should throw exception due to collision } }
@Test public void testCopyWithNonConflictingCollision() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1", 1), createFileStatus(source, "file2")); List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file1", 1)); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.assertEquals(copyableFiles.size(), 1); ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file2"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file2")), new Path(target, "file2")); Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 0); }
@Test public void testSimpleCopy() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1"), createFileStatus(source, "file2")); List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file3")); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.assertEquals(copyableFiles.size(), 2); ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")), new Path(target, "file1")); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file2"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file2")), new Path(target, "file2")); Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 0); }
@Test public void testCopyWithUpdate() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); FileStatus targetFile1 = createFileStatus(target, "file1", 2); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1", 1), createFileStatus(source, "file2")); List<FileStatus> targetFiles = Lists.newArrayList(targetFile1); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); properties.setProperty(RecursiveCopyableDataset.UPDATE_KEY, "true"); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); FileSystem targetFsUnderlying = FileSystem.getLocal(new Configuration()); FileSystem targetFs = Mockito.spy(targetFsUnderlying); Mockito.doReturn(targetFile1).when(targetFs).getFileStatus(new Path(target, "file1")); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(targetFs, CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.assertEquals(copyableFiles.size(), 3); ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")), new Path(target, "file1")); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file2"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file2")), new Path(target, "file2")); Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 1); Assert.assertTrue(classifiedFiles.getPathsToDelete().contains(new Path(target, "file1"))); }
@Test public void testCopyWithDeleteTarget() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1")); List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file3")); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); properties.setProperty(RecursiveCopyableDataset.DELETE_KEY, "true"); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.assertEquals(copyableFiles.size(), 2); ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")), new Path(target, "file1")); Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 1); Assert.assertTrue(classifiedFiles.getPathsToDelete().contains(new Path(target, "file3"))); CommitStepCopyEntity entity = (CommitStepCopyEntity) Iterables.filter(copyableFiles, new Predicate<CopyEntity>() { @Override public boolean apply(@Nullable CopyEntity copyEntity) { return copyEntity instanceof CommitStepCopyEntity; } }).iterator().next(); DeleteFileCommitStep step = (DeleteFileCommitStep) entity.getStep(); Assert.assertFalse(step.getParentDeletionLimit().isPresent()); }
@Test public void testCopyWithDeleteTargetAndDeleteParentDirectories() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1")); List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file3")); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); properties.setProperty(RecursiveCopyableDataset.DELETE_EMPTY_DIRECTORIES_KEY, "true"); properties.setProperty(RecursiveCopyableDataset.DELETE_KEY, "true"); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.assertEquals(copyableFiles.size(), 2); ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")), new Path(target, "file1")); Assert.assertEquals(classifiedFiles.getPathsToDelete().size(), 1); Assert.assertTrue(classifiedFiles.getPathsToDelete().contains(new Path(target, "file3"))); CommitStepCopyEntity entity = (CommitStepCopyEntity) Iterables.filter(copyableFiles, new Predicate<CopyEntity>() { @Override public boolean apply(@Nullable CopyEntity copyEntity) { return copyEntity instanceof CommitStepCopyEntity; } }).iterator().next(); DeleteFileCommitStep step = (DeleteFileCommitStep) entity.getStep(); Assert.assertTrue(step.getParentDeletionLimit().isPresent()); Assert.assertEquals(step.getParentDeletionLimit().get(), target); }