/** * Determines whether a file represented by this source is can be split into bundles. * * <p>By default, a source in mode {@link Mode#FILEPATTERN} is always splittable, because * splitting will involve expanding the file pattern and producing single-file/subrange sources, * which may or may not be splittable themselves. * * <p>By default, a source in {@link Mode#SINGLE_FILE_OR_SUBRANGE} is splittable if it is on a * file system that supports efficient read seeking. * * <p>Subclasses may override to provide different behavior. */ protected boolean isSplittable() throws Exception { if (mode == Mode.FILEPATTERN) { // split will expand file pattern and return single file or subrange sources that // may or may not be splittable. return true; } return getSingleFileMetadata().isReadSeekEfficient(); }
@Override public final long getMaxEndOffset(PipelineOptions options) throws IOException { checkArgument( mode != Mode.FILEPATTERN, "Cannot determine the exact end offset of a file pattern"); Metadata metadata = getSingleFileMetadata(); return metadata.sizeBytes(); }
@Override protected final boolean startImpl() throws IOException { FileBasedSource<T> source = getCurrentSource(); this.channel = FileSystems.open(source.getSingleFileMetadata().resourceId()); if (channel instanceof SeekableByteChannel) { SeekableByteChannel seekChannel = (SeekableByteChannel) channel; seekChannel.position(source.getStartOffset()); } else { // Channel is not seekable. Must not be a subrange. checkArgument( source.mode != Mode.SINGLE_FILE_OR_SUBRANGE, "Subrange-based sources must only be defined for file types that support seekable " + " read channels"); checkArgument( source.getStartOffset() == 0, "Start offset %s is not zero but channel for reading the file is not seekable.", source.getStartOffset()); } startReading(channel); // Advance once to load the first record. return advanceImpl(); }