/** * Generates a replicable guid to uniquely identify the origin of this {@link CopyableFile}. * @return a guid uniquely identifying the origin file. */ @Override public Guid guid() throws IOException { StringBuilder uniqueString = new StringBuilder(); uniqueString.append(getFileStatus().getModificationTime()); uniqueString.append(getFileStatus().getLen()); uniqueString.append(getFileStatus().getPath()); return Guid.fromStrings(uniqueString.toString()); }
@Override public Optional<WatermarkInterval> generateWatermarkIntervalForCopyableFile(CopyableFile copyableFile) throws IOException { StringWatermark stringWatermark = new StringWatermark(copyableFile.getFileStatus().getPath().toString()); return Optional.of(new WatermarkInterval(stringWatermark, stringWatermark)); }
long len = file.getFileStatus().getLen(); long blockSize = ArithmeticUtils.lcm(file.getFileStatus().getBlockSize(), file.getBlockSize(targetFs)); long maxSplitSize = workUnit.getPropAsLong(MAX_SPLIT_SIZE_KEY, DEFAULT_MAX_SPLIT_SIZE);
@Override public FileAwareInputStream readRecord(@Deprecated FileAwareInputStream reuse) throws DataRecordException, IOException { if (!this.recordRead) { Configuration conf = this.state == null ? HadoopUtils.newConfiguration() : HadoopUtils.getConfFromState(this.state); FileSystem fsFromFile = this.file.getOrigin().getPath().getFileSystem(conf); this.recordRead = true; FileAwareInputStream.FileAwareInputStreamBuilder builder = FileAwareInputStream.builder().file(this.file); if (this.file.getFileStatus().isDirectory()) { return builder.inputStream(EmptyInputStream.instance).build(); } FSDataInputStream dataInputStream = fsFromFile.open(this.file.getFileStatus().getPath()); if (this.state != null && DistcpFileSplitter.isSplitWorkUnit(this.state)) { Optional<DistcpFileSplitter.Split> split = DistcpFileSplitter.getSplit(this.state); builder.split(split); if (split.isPresent()) { dataInputStream.seek(split.get().getLowPosition()); } } builder.inputStream(MeteredInputStream.builder().in(dataInputStream).build()); return builder.build(); } return null; }
final long fileSize = copyableFile.getFileStatus().getLen(); } else { if (copyableFile.getFileStatus().isDirectory()) { this.fs.mkdirs(writeAt); return;
private Collection<WorkUnit> createMockSplitWorkUnits(FileSystem fs, long fileLen, long blockSize, long maxSplitSize) throws Exception { FileStatus file = mock(FileStatus.class); when(file.getLen()).thenReturn(fileLen); when(file.getBlockSize()).thenReturn(blockSize); URI uri = new URI("hdfs", "dummyhost", "/test", "test"); Path path = new Path(uri); when(fs.getUri()).thenReturn(uri); CopyableDatasetMetadata cdm = new CopyableDatasetMetadata(new TestCopyableDataset(path)); CopyableFile cf = CopyableFileUtils.getTestCopyableFile(); CopyableFile spy = spy(cf); doReturn(file).when(spy).getFileStatus(); doReturn(blockSize).when(spy).getBlockSize(any(FileSystem.class)); doReturn(path).when(spy).getDestination(); WorkUnit wu = WorkUnit.createEmpty(); wu.setProp(DistcpFileSplitter.MAX_SPLIT_SIZE_KEY, maxSplitSize); wu.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 1, 0), path.toString()); CopySource.setWorkUnitGuid(wu, Guid.fromStrings(wu.toString())); CopySource.serializeCopyEntity(wu, cf); CopySource.serializeCopyableDataset(wu, cdm); return DistcpFileSplitter.splitFile(spy, wu, fs); }
/** * Generates a replicable guid to uniquely identify the origin of this {@link CopyableFile}. * @return a guid uniquely identifying the origin file. */ @Override public Guid guid() throws IOException { StringBuilder uniqueString = new StringBuilder(); uniqueString.append(getFileStatus().getModificationTime()); uniqueString.append(getFileStatus().getLen()); uniqueString.append(getFileStatus().getPath()); return Guid.fromStrings(uniqueString.toString()); }
@Override public Optional<WatermarkInterval> generateWatermarkIntervalForCopyableFile(CopyableFile copyableFile) throws IOException { StringWatermark stringWatermark = new StringWatermark(copyableFile.getFileStatus().getPath().toString()); return Optional.of(new WatermarkInterval(stringWatermark, stringWatermark)); }
long len = file.getFileStatus().getLen(); long blockSize = ArithmeticUtils.lcm(file.getFileStatus().getBlockSize(), file.getBlockSize(targetFs)); long maxSplitSize = workUnit.getPropAsLong(MAX_SPLIT_SIZE_KEY, DEFAULT_MAX_SPLIT_SIZE);
@Override public FileAwareInputStream readRecord(@Deprecated FileAwareInputStream reuse) throws DataRecordException, IOException { if (!this.recordRead) { Configuration conf = this.state == null ? HadoopUtils.newConfiguration() : HadoopUtils.getConfFromState(this.state); FileSystem fsFromFile = this.file.getOrigin().getPath().getFileSystem(conf); this.recordRead = true; FileAwareInputStream.FileAwareInputStreamBuilder builder = FileAwareInputStream.builder().file(this.file); if (this.file.getFileStatus().isDirectory()) { return builder.inputStream(EmptyInputStream.instance).build(); } FSDataInputStream dataInputStream = fsFromFile.open(this.file.getFileStatus().getPath()); if (this.state != null && DistcpFileSplitter.isSplitWorkUnit(this.state)) { Optional<DistcpFileSplitter.Split> split = DistcpFileSplitter.getSplit(this.state); builder.split(split); if (split.isPresent()) { dataInputStream.seek(split.get().getLowPosition()); } } builder.inputStream(MeteredInputStream.builder().in(dataInputStream).build()); return builder.build(); } return null; }
final long fileSize = copyableFile.getFileStatus().getLen(); } else { if (copyableFile.getFileStatus().isDirectory()) { this.fs.mkdirs(writeAt); return;