for (WorkUnitState workUnit : workUnits) { if (isSplitWorkUnit(workUnit)) { CopyableFile copyableFile = (CopyableFile) CopySource.deserializeCopyEntity(workUnit); splitWorkUnitsMap.put(copyableFile, workUnit); Path outputDir = FileAwareInputStreamDataWriter.getOutputDir(oldWorkUnit); CopyEntity.DatasetAndPartition datasetAndPartition = file.getDatasetAndPartition(CopySource.deserializeCopyableDataset(oldWorkUnit)); Path parentPath = FileAwareInputStreamDataWriter.getOutputFilePath(file, outputDir, datasetAndPartition) .getParent();
List<WorkUnitState> getWorkUnitStates() throws IOException { List<WorkUnitState> workUnitStates = Lists.newArrayList(new WorkUnitState(), new WorkUnitState(), new WorkUnitState()); for (WorkUnitState wus : workUnitStates) { CopySource.serializeCopyableDataset(wus, metadata); CopySource.serializeCopyEntity(wus, this.copyEntity); } return workUnitStates; }
private static void computeAndSetWorkUnitGuid(WorkUnit workUnit) throws IOException { Guid guid = Guid.fromStrings(workUnit.contains(ConfigurationKeys.CONVERTER_CLASSES_KEY) ? workUnit .getProp(ConfigurationKeys.CONVERTER_CLASSES_KEY) : ""); setWorkUnitGuid(workUnit, guid.append(deserializeCopyEntity(workUnit))); }
/** * @param state a {@link org.apache.gobblin.configuration.WorkUnitState} carrying properties needed by the returned * {@link Extractor} * @return a {@link FileAwareInputStreamExtractor}. * @throws IOException */ @Override public Extractor<String, FileAwareInputStream> getExtractor(WorkUnitState state) throws IOException { Class<?> copyEntityClass = getCopyEntityClass(state); if (CopyableFile.class.isAssignableFrom(copyEntityClass)) { CopyableFile copyEntity = (CopyableFile) deserializeCopyEntity(state); return extractorForCopyableFile(HadoopUtils.getSourceFileSystem(state), copyEntity, state); } return new EmptyExtractor<>("empty"); }
private static List<CommitStep> getCommitSequence(Collection<WorkUnitState> workUnits, Class<?> baseClass) throws IOException { List<CommitStepCopyEntity> steps = Lists.newArrayList(); for (WorkUnitState wus : workUnits) { if (baseClass.isAssignableFrom(CopySource.getCopyEntityClass(wus))) { CommitStepCopyEntity step = (CommitStepCopyEntity) CopySource.deserializeCopyEntity(wus); steps.add(step); } } Comparator<CommitStepCopyEntity> commitStepSorter = new Comparator<CommitStepCopyEntity>() { @Override public int compare(CommitStepCopyEntity o1, CommitStepCopyEntity o2) { return Integer.compare(o1.getPriority(), o2.getPriority()); } }; Collections.sort(steps, commitStepSorter); List<CommitStep> sequence = Lists.newArrayList(); for (CommitStepCopyEntity entity : steps) { sequence.add(entity.getStep()); } return sequence; }
private Collection<WorkUnit> createMockSplitWorkUnits(FileSystem fs, long fileLen, long blockSize, long maxSplitSize) throws Exception { FileStatus file = mock(FileStatus.class); when(file.getLen()).thenReturn(fileLen); when(file.getBlockSize()).thenReturn(blockSize); URI uri = new URI("hdfs", "dummyhost", "/test", "test"); Path path = new Path(uri); when(fs.getUri()).thenReturn(uri); CopyableDatasetMetadata cdm = new CopyableDatasetMetadata(new TestCopyableDataset(path)); CopyableFile cf = CopyableFileUtils.getTestCopyableFile(); CopyableFile spy = spy(cf); doReturn(file).when(spy).getFileStatus(); doReturn(blockSize).when(spy).getBlockSize(any(FileSystem.class)); doReturn(path).when(spy).getDestination(); WorkUnit wu = WorkUnit.createEmpty(); wu.setProp(DistcpFileSplitter.MAX_SPLIT_SIZE_KEY, maxSplitSize); wu.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 1, 0), path.toString()); CopySource.setWorkUnitGuid(wu, Guid.fromStrings(wu.toString())); CopySource.serializeCopyEntity(wu, cf); CopySource.serializeCopyableDataset(wu, cdm); return DistcpFileSplitter.splitFile(spy, wu, fs); }
HashMultimap.<FileSet<CopyEntity>, WorkUnit>create()); RequestAllocator<FileSet<CopyEntity>> allocator = createRequestAllocator(copyConfiguration, maxThreads); Iterator<FileSet<CopyEntity>> prioritizedFileSets = allocator.allocateRequests(requestorIterator, copyConfiguration.getMaxToCopy()); submitUnfulfilledRequestEvents(allocator); for (WorkUnit workUnit : entry.getValue()) { try { CopyEntity copyEntity = deserializeCopyEntity(workUnit); log.info(copyEntity.explain()); } catch (Exception e) {
/** * Create a {@link Multimap} that maps a {@link CopyableDataset} to all {@link WorkUnitState}s that belong to this * {@link CopyableDataset}. This mapping is used to set WorkingState of all {@link WorkUnitState}s to * {@link WorkUnitState.WorkingState#COMMITTED} after a {@link CopyableDataset} is successfully published. */ private static Multimap<CopyEntity.DatasetAndPartition, WorkUnitState> groupByFileSet( Collection<? extends WorkUnitState> states) { Multimap<CopyEntity.DatasetAndPartition, WorkUnitState> datasetRoots = ArrayListMultimap.create(); for (WorkUnitState workUnitState : states) { CopyEntity file = CopySource.deserializeCopyEntity(workUnitState); CopyEntity.DatasetAndPartition datasetAndPartition = file.getDatasetAndPartition( CopyableDatasetMetadata.deserialize(workUnitState.getProp(CopySource.SERIALIZED_COPYABLE_DATASET))); datasetRoots.put(datasetAndPartition, workUnitState); } return datasetRoots; }
@Test public void testCopySource() throws Exception { SourceState state = new SourceState(); state.setProp(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, "file:///"); state.setProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, "file:///"); state.setProp(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR, "/target/dir"); state.setProp(DatasetUtils.DATASET_PROFILE_CLASS_KEY, TestCopyableDatasetFinder.class.getName()); CopySource source = new CopySource(); List<WorkUnit> workunits = source.getWorkunits(state); workunits = JobLauncherUtils.flattenWorkUnits(workunits); Assert.assertEquals(workunits.size(), TestCopyableDataset.FILE_COUNT); Extract extract = workunits.get(0).getExtract(); for (WorkUnit workUnit : workunits) { CopyableFile file = (CopyableFile) CopySource.deserializeCopyEntity(workUnit); Assert.assertTrue(file.getOrigin().getPath().toString().startsWith(TestCopyableDataset.ORIGIN_PREFIX)); Assert.assertEquals(file.getDestinationOwnerAndPermission(), TestCopyableDataset.OWNER_AND_PERMISSION); Assert.assertEquals(workUnit.getExtract(), extract); } }
Guid oldGuid = CopySource.getWorkUnitGuid(newWorkUnit).get(); Guid newGuid = oldGuid.append(Guid.fromStrings(serializedSplit)); CopySource.setWorkUnitGuid(workUnit, newGuid); newWorkUnits.add(newWorkUnit);
@Override public State getFinalState() { State state = new State(); if (this.actualProcessedCopyableFile.isPresent()) { CopySource.serializeCopyEntity(state, this.actualProcessedCopyableFile.get()); } ConstructState constructState = new ConstructState(); constructState.addOverwriteProperties(state); return constructState; }
CopyConfiguration.builder(fs, state.getProperties()).preserve(PreserveAttributes.fromMnemonicString("")).build()).build(); CopySource.setWorkUnitGuid(state, Guid.fromHasGuid(copyableFile));
private static boolean hasCopyableFiles(Collection<WorkUnitState> workUnits) throws IOException { for (WorkUnitState wus : workUnits) { if (CopyableFile.class.isAssignableFrom(CopySource.getCopyEntityClass(wus))) { return true; } } return false; }
private static String computeGuid(State state, CopyEntity file) throws IOException { Optional<Guid> stateGuid = CopySource.getWorkUnitGuid(state); if (stateGuid.isPresent()) { return Guid.combine(file.guid(), stateGuid.get()).toString(); } throw new IOException("State does not contain a guid."); } }
/** * @param state a {@link org.apache.gobblin.configuration.WorkUnitState} carrying properties needed by the returned * {@link Extractor} * @return a {@link FileAwareInputStreamExtractor}. * @throws IOException */ @Override public Extractor<String, FileAwareInputStream> getExtractor(WorkUnitState state) throws IOException { Class<?> copyEntityClass = getCopyEntityClass(state); if (CopyableFile.class.isAssignableFrom(copyEntityClass)) { CopyableFile copyEntity = (CopyableFile) deserializeCopyEntity(state); return extractorForCopyableFile(HadoopUtils.getSourceFileSystem(state), copyEntity, state); } return new EmptyExtractor<>("empty"); }
HashMultimap.<FileSet<CopyEntity>, WorkUnit>create()); RequestAllocator<FileSet<CopyEntity>> allocator = createRequestAllocator(copyConfiguration, maxThreads); Iterator<FileSet<CopyEntity>> prioritizedFileSets = allocator.allocateRequests(requestorIterator, copyConfiguration.getMaxToCopy()); submitUnfulfilledRequestEvents(allocator); for (WorkUnit workUnit : entry.getValue()) { try { CopyEntity copyEntity = deserializeCopyEntity(workUnit); log.info(copyEntity.explain()); } catch (Exception e) {
private void deleteFilesOnSource(WorkUnitState state) throws IOException { CopyEntity copyEntity = CopySource.deserializeCopyEntity(state); if (copyEntity instanceof CopyableFile) { HadoopUtils.deletePath(this.sourceFs, ((CopyableFile) copyEntity).getOrigin().getPath(), true); HadoopUtils.deletePath(this.sourceFs, PathUtils.addExtension(((CopyableFile) copyEntity).getOrigin().getPath(), ReadyCopyableFileFilter.READY_EXTENSION), true); } } }
private static List<CommitStep> getCommitSequence(Collection<WorkUnitState> workUnits, Class<?> baseClass) throws IOException { List<CommitStepCopyEntity> steps = Lists.newArrayList(); for (WorkUnitState wus : workUnits) { if (baseClass.isAssignableFrom(CopySource.getCopyEntityClass(wus))) { CommitStepCopyEntity step = (CommitStepCopyEntity) CopySource.deserializeCopyEntity(wus); steps.add(step); } } Comparator<CommitStepCopyEntity> commitStepSorter = new Comparator<CommitStepCopyEntity>() { @Override public int compare(CommitStepCopyEntity o1, CommitStepCopyEntity o2) { return Integer.compare(o1.getPriority(), o2.getPriority()); } }; Collections.sort(steps, commitStepSorter); List<CommitStep> sequence = Lists.newArrayList(); for (CommitStepCopyEntity entity : steps) { sequence.add(entity.getStep()); } return sequence; }
TestCopyablePartitionableDatasedFinder.class.getCanonicalName()); CopySource source = new CopySource(); List<WorkUnit> workunits = source.getWorkunits(state); workunits = JobLauncherUtils.flattenWorkUnits(workunits); CopyableFile copyableFile = (CopyableFile) CopySource.deserializeCopyEntity(workUnit); Assert.assertTrue(copyableFile.getOrigin().getPath().toString().startsWith(TestCopyableDataset.ORIGIN_PREFIX)); Assert.assertEquals(copyableFile.getDestinationOwnerAndPermission(), TestCopyableDataset.OWNER_AND_PERMISSION);
Guid oldGuid = CopySource.getWorkUnitGuid(newWorkUnit).get(); Guid newGuid = oldGuid.append(Guid.fromStrings(serializedSplit)); CopySource.setWorkUnitGuid(workUnit, newGuid); newWorkUnits.add(newWorkUnit);