/** * Creates a {@code CompressedSource} for a subrange of a file. Called by superclass to create a * source for a single file. */ @Override protected FileBasedSource<T> createForSubrangeOfFile(Metadata metadata, long start, long end) { return new CompressedSource<>( sourceDelegate.createForSubrangeOfFile(metadata, start, end), channelFactory, metadata, sourceDelegate.getMinBundleSize(), start, end); }
@Override public final BoundedReader<T> createReader(PipelineOptions options) throws IOException { // Validate the current source prior to creating a reader for it. this.validate(); String fileOrPattern = fileOrPatternSpec.get(); if (mode == Mode.FILEPATTERN) { long startTime = System.currentTimeMillis(); List<Metadata> fileMetadata = FileSystems.match(fileOrPattern, emptyMatchTreatment).metadata(); LOG.info("Matched {} files for pattern {}", fileMetadata.size(), fileOrPattern); List<FileBasedReader<T>> fileReaders = new ArrayList<>(); for (Metadata metadata : fileMetadata) { long endOffset = metadata.sizeBytes(); fileReaders.add( createForSubrangeOfFile(metadata, 0, endOffset).createSingleFileReader(options)); } LOG.debug( "Creating a reader for file pattern {} took {} ms", fileOrPattern, System.currentTimeMillis() - startTime); if (fileReaders.size() == 1) { return fileReaders.get(0); } return new FilePatternReader(this, fileReaders); } else { return createSingleFileReader(options); } }
List<FileBasedSource<T>> splitResults = new ArrayList<>(expandedFiles.size()); for (Metadata metadata : expandedFiles) { FileBasedSource<T> split = createForSubrangeOfFile(metadata, 0, metadata.sizeBytes()); verify( split.getMode() == Mode.SINGLE_FILE_OR_SUBRANGE,
@Override public final FileBasedSource<T> createSourceForSubrange(long start, long end) { checkArgument( mode != Mode.FILEPATTERN, "Cannot split a file pattern based source based on positions"); checkArgument( start >= getStartOffset(), "Start offset value %s of the subrange cannot be smaller than the start offset value %s" + " of the parent source", start, getStartOffset()); checkArgument( end <= getEndOffset(), "End offset value %s of the subrange cannot be larger than the end offset value %s", end, getEndOffset()); checkState( singleFileMetadata != null, "A single file source should not have null metadata: %s", this); FileBasedSource<T> source = createForSubrangeOfFile(singleFileMetadata, start, end); if (start > 0 || end != Long.MAX_VALUE) { checkArgument( source.getMode() == Mode.SINGLE_FILE_OR_SUBRANGE, "Source created for the range [%s,%s) must be a subrange source", start, end); } return source; }
@ProcessElement public void process(ProcessContext c) throws IOException { ReadableFile file = c.element().getKey(); OffsetRange range = c.element().getValue(); FileBasedSource<T> source = CompressedSource.from(createSource.apply(file.getMetadata().resourceId().toString())) .withCompression(file.getCompression()); try (BoundedSource.BoundedReader<T> reader = source .createForSubrangeOfFile(file.getMetadata(), range.getFrom(), range.getTo()) .createReader(c.getPipelineOptions())) { for (boolean more = reader.start(); more; more = reader.advance()) { c.output(reader.getCurrent()); } } } }