int maxThreads = state.getPropAsInt(MAX_CONCURRENT_LISTING_SERVICES, DEFAULT_MAX_CONCURRENT_LISTING_SERVICES); final CopyConfiguration copyConfiguration = CopyConfiguration.builder(targetFs, state.getProperties()).build(); allocator.allocateRequests(requestorIterator, copyConfiguration.getMaxToCopy()); copyConfiguration.getCopyContext().logCacheStatistics();
@VisibleForTesting protected CopyableFileGenerator getCopyableFileGenetator(FileSystem targetFs, CopyConfiguration configuration, TimestampedDatasetVersion copyableVersion, ConcurrentLinkedQueue<CopyableFile> copyableFileList) { return new CopyableFileGenerator(this.srcFs, targetFs, configuration, this.datasetRoot, this.getTargetRoot(configuration.getPublishDir()), copyableVersion.getDateTime(), copyableVersion.getPaths(), copyableFileList, this.copyableFileFilter()); }
/** * Computes the correct {@link OwnerAndPermission} obtained from replicating source owner and permissions and applying * the {@link PreserveAttributes} rules in copyConfiguration. * @throws IOException */ public static OwnerAndPermission resolveReplicatedOwnerAndPermission(FileSystem fs, Path path, CopyConfiguration copyConfiguration) throws IOException { PreserveAttributes preserve = copyConfiguration.getPreserve(); Optional<FileStatus> originFileStatus = copyConfiguration.getCopyContext().getFileStatus(fs, path); if (!originFileStatus.isPresent()) { throw new IOException(String.format("Origin path %s does not exist.", path)); } String group = null; if (copyConfiguration.getTargetGroup().isPresent()) { group = copyConfiguration.getTargetGroup().get(); } else if (preserve.preserve(Option.GROUP)) { group = originFileStatus.get().getGroup(); } return new OwnerAndPermission(preserve.preserve(Option.OWNER) ? originFileStatus.get().getOwner() : null, group, preserve.preserve(Option.PERMISSION) ? originFileStatus.get().getPermission() : null); }
/** * Get a {@link CopyableFile.Builder}. * * @param originFs {@link FileSystem} where original file exists. * @param origin {@link FileStatus} of the original file. * @param datasetRoot Value of {@link CopyableDataset#datasetRoot} of the dataset creating this {@link CopyableFile}. * @param copyConfiguration {@link CopyConfiguration} for the copy job. * @return a {@link CopyableFile.Builder}. * @deprecated use {@link #fromOriginAndDestination}. This method was changed to remove reliance on dataset root * which is not standard of all datasets. The old functionality on inferring destinations cannot be * achieved without dataset root and common dataset root, so this is an approximation. Copyable datasets * should compute file destinations themselves. */ @Deprecated public static Builder builder(FileSystem originFs, FileStatus origin, Path datasetRoot, CopyConfiguration copyConfiguration) { Path relativePath = PathUtils.relativizePath(origin.getPath(), datasetRoot); Path targetRoot = new Path(copyConfiguration.getPublishDir(), datasetRoot.getName()); Path targetPath = new Path(targetRoot, relativePath); return _hiddenBuilder().originFS(originFs).origin(origin).destination(targetPath) .preserve(copyConfiguration.getPreserve()).configuration(copyConfiguration); }
private RequestAllocator<FileSet<CopyEntity>> createRequestAllocator(CopyConfiguration copyConfiguration, int maxThreads) { Optional<FileSetComparator> prioritizer = copyConfiguration.getPrioritizer(); RequestAllocatorConfig.Builder<FileSet<CopyEntity>> configBuilder = RequestAllocatorConfig.builder(new FileSetResourceEstimator()).allowParallelization(maxThreads) .storeRejectedRequests(copyConfiguration.getStoreRejectedRequestsSetting()) .withLimitedScopeConfig(copyConfiguration.getPrioritizationConfig()); if (!prioritizer.isPresent()) { return new GreedyAllocator<>(configBuilder.build()); } else { configBuilder.withPrioritizer(prioritizer.get()); } if (prioritizer.get() instanceof HierarchicalPrioritizer) { return new HierarchicalAllocator.Factory().createRequestAllocator(configBuilder.build()); } else { return RequestAllocatorUtils.inferFromConfig(configBuilder.build()); } }
private CopyableFile getTestCopyableFile(String resourcePath) throws IOException { String filePath = getClass().getClassLoader().getResource(resourcePath).getFile(); FileStatus status = new FileStatus(0l, false, 0, 0l, 0l, new Path(filePath)); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/publisher"); return CopyableFile.fromOriginAndDestination(FileSystem.getLocal(new Configuration()), status, new Path("/destination"), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties) .preserve(PreserveAttributes.fromMnemonicString("")).build()).build(); } }
private List<OwnerAndPermission> replicateAncestorsOwnerAndPermission(FileSystem originFs, Path originPath, FileSystem targetFs, Path destinationPath) throws IOException { List<OwnerAndPermission> ancestorOwnerAndPermissions = Lists.newArrayList(); Path currentOriginPath = originPath.getParent(); Path currentTargetPath = destinationPath.getParent(); while (currentOriginPath != null && currentTargetPath != null && currentOriginPath.getName().equals(currentTargetPath.getName())) { Optional<FileStatus> targetFileStatus = this.configuration.getCopyContext().getFileStatus(targetFs, currentTargetPath); if (targetFileStatus.isPresent()) { return ancestorOwnerAndPermissions; } ancestorOwnerAndPermissions .add(resolveReplicatedOwnerAndPermission(originFs, currentOriginPath, this.configuration)); currentOriginPath = currentOriginPath.getParent(); currentTargetPath = currentTargetPath.getParent(); } return ancestorOwnerAndPermissions; }
.getPropAsInt(CopySource.MAX_CONCURRENT_LISTING_SERVICES, CopySource.DEFAULT_MAX_CONCURRENT_LISTING_SERVICES); final CopyConfiguration copyConfiguration = CopyConfiguration.builder(targetFs, state.getProperties()).build(); (PriorityIterableBasedRequestAllocator<FileSet<CopyEntity>>) m.invoke(source, copyConfiguration, maxThreads); Iterator<FileSet<CopyEntity>> prioritizedFileSets = allocator.allocateRequests(requestorIterator, copyConfiguration.getMaxToCopy()); List<FileSet<CopyEntity>> fileSetList = allocator.getRequestsExceedingAvailableResourcePool(); Assert.assertEquals(fileSetList.size(), 2);
if (this.preserve.preserve(Option.GROUP)) { group = this.origin.getGroup(); } else if (this.configuration.getTargetGroup().isPresent()) { group = this.configuration.getTargetGroup().get(); this.configuration.getTargetFs(), this.destination);
public static Builder fromOriginAndDestination(FileSystem originFs, FileStatus origin, Path destination, CopyConfiguration copyConfiguration) { return _hiddenBuilder().originFS(originFs).origin(origin).destination(destination).configuration(copyConfiguration) .preserve(copyConfiguration.getPreserve()); }
@Test public void testCopyWithConflictingCollisionDueToModtime() throws Exception { Path source = new Path("/source"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1", 1, 10), createFileStatus(source, "file2")); List<FileStatus> targetFiles = Lists.newArrayList(createFileStatus(target, "file1", 1, 9)); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, target, sourceFiles, targetFiles, properties); try { Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.getLocal(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.fail(); } catch (IOException ioe) { // should throw exception due to collision } }
input.getExistingTargetPartition().get().getParameters().get(HiveDataset.REGISTRATION_GENERATION_TIME_MILLIS)); Optional<FileStatus> sourceFileStatus = this.helper.getConfiguration().getCopyContext(). getFileStatus(this.helper.getDataset().getFs(), input.getPartition().getDataLocation());
private RequestAllocator<FileSet<CopyEntity>> createRequestAllocator(CopyConfiguration copyConfiguration, int maxThreads) { Optional<FileSetComparator> prioritizer = copyConfiguration.getPrioritizer(); RequestAllocatorConfig.Builder<FileSet<CopyEntity>> configBuilder = RequestAllocatorConfig.builder(new FileSetResourceEstimator()).allowParallelization(maxThreads) .storeRejectedRequests(copyConfiguration.getStoreRejectedRequestsSetting()) .withLimitedScopeConfig(copyConfiguration.getPrioritizationConfig()); if (!prioritizer.isPresent()) { return new GreedyAllocator<>(configBuilder.build()); } else { configBuilder.withPrioritizer(prioritizer.get()); } if (prioritizer.get() instanceof HierarchicalPrioritizer) { return new HierarchicalAllocator.Factory().createRequestAllocator(configBuilder.build()); } else { return RequestAllocatorUtils.inferFromConfig(configBuilder.build()); } }
/** * Get a {@link CopyableFile.Builder}. * * @param originFs {@link FileSystem} where original file exists. * @param origin {@link FileStatus} of the original file. * @param datasetRoot Value of {@link CopyableDataset#datasetRoot} of the dataset creating this {@link CopyableFile}. * @param copyConfiguration {@link CopyConfiguration} for the copy job. * @return a {@link CopyableFile.Builder}. * @deprecated use {@link #fromOriginAndDestination}. This method was changed to remove reliance on dataset root * which is not standard of all datasets. The old functionality on inferring destinations cannot be * achieved without dataset root and common dataset root, so this is an approximation. Copyable datasets * should compute file destinations themselves. */ @Deprecated public static Builder builder(FileSystem originFs, FileStatus origin, Path datasetRoot, CopyConfiguration copyConfiguration) { Path relativePath = PathUtils.relativizePath(origin.getPath(), datasetRoot); Path targetRoot = new Path(copyConfiguration.getPublishDir(), datasetRoot.getName()); Path targetPath = new Path(targetRoot, relativePath); return _hiddenBuilder().originFS(originFs).origin(origin).destination(targetPath) .preserve(copyConfiguration.getPreserve()).configuration(copyConfiguration); }
if (this.preserve.preserve(Option.GROUP)) { group = this.origin.getGroup(); } else if (this.configuration.getTargetGroup().isPresent()) { group = this.configuration.getTargetGroup().get(); this.configuration.getTargetFs(), this.destination);
public static Builder fromOriginAndDestination(FileSystem originFs, FileStatus origin, Path destination, CopyConfiguration copyConfiguration) { return _hiddenBuilder().originFS(originFs).origin(origin).destination(destination).configuration(copyConfiguration) .preserve(copyConfiguration.getPreserve()); }
int maxThreads = state.getPropAsInt(MAX_CONCURRENT_LISTING_SERVICES, DEFAULT_MAX_CONCURRENT_LISTING_SERVICES); final CopyConfiguration copyConfiguration = CopyConfiguration.builder(targetFs, state.getProperties()).build(); allocator.allocateRequests(requestorIterator, copyConfiguration.getMaxToCopy()); copyConfiguration.getCopyContext().logCacheStatistics();
@Test public void testCorrectComputationOfTargetPathsWhenUsingGlob() throws Exception { Path source = new Path("/source/directory"); Path target = new Path("/target"); List<FileStatus> sourceFiles = Lists.newArrayList(createFileStatus(source, "file1")); List<FileStatus> targetFiles = Lists.newArrayList(); Properties properties = new Properties(); properties.setProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, target.toString()); FileSystem sourceUnderlyingFS = FileSystem.getLocal(new Configuration()); FileSystem sourceFs = Mockito.spy(sourceUnderlyingFS); Mockito.doReturn(new FileStatus(0, true, 0, 0, 0, source)).when(sourceFs).getFileStatus(source); RecursiveCopyableDataset dataset = new TestRecursiveCopyableDataset(source, new Path(target, "directory"), sourceFiles, targetFiles, properties, new Path("/source/*"), sourceFs); Collection<? extends CopyEntity> copyableFiles = dataset.getCopyableFiles(FileSystem.get(new Configuration()), CopyConfiguration.builder(FileSystem.getLocal(new Configuration()), properties).build()); Assert.assertEquals(copyableFiles.size(), 1); ClassifiedFiles classifiedFiles = classifyFiles(copyableFiles); Assert.assertTrue(classifiedFiles.getPathsToCopy().containsKey(new Path(source, "file1"))); Assert.assertEquals(classifiedFiles.getPathsToCopy().get(new Path(source, "file1")), new Path(target, "directory/file1")); }
/** * Computes the correct {@link OwnerAndPermission} obtained from replicating source owner and permissions and applying * the {@link PreserveAttributes} rules in copyConfiguration. * @throws IOException */ public static OwnerAndPermission resolveReplicatedOwnerAndPermission(FileSystem fs, Path path, CopyConfiguration copyConfiguration) throws IOException { PreserveAttributes preserve = copyConfiguration.getPreserve(); Optional<FileStatus> originFileStatus = copyConfiguration.getCopyContext().getFileStatus(fs, path); if (!originFileStatus.isPresent()) { throw new IOException(String.format("Origin path %s does not exist.", path)); } String group = null; if (copyConfiguration.getTargetGroup().isPresent()) { group = copyConfiguration.getTargetGroup().get(); } else if (preserve.preserve(Option.GROUP)) { group = originFileStatus.get().getGroup(); } return new OwnerAndPermission(preserve.preserve(Option.OWNER) ? originFileStatus.get().getOwner() : null, group, preserve.preserve(Option.PERMISSION) ? originFileStatus.get().getPermission() : null); }
this.helper.getConfiguration().getCopyContext().getFileStatus(this.helper.getTargetFs(), targetPath); Optional<FileStatus> sourceFileStatus = this.helper.getConfiguration().getCopyContext() .getFileStatus(this.helper.getDataset().getFs(), input.getPartition().getDataLocation());