assertEquals("Hello world".length(), input.getMetadata().sizeBytes()); assertEquals(Compression.UNCOMPRESSED, input.getCompression()); assertTrue(input.getMetadata().isReadSeekEfficient()); try { assertEquals("Hello world", input.readFullyAsUTF8String()); assertFalse(input.getMetadata().sizeBytes() == "Hello world".length()); assertEquals(Compression.GZIP, input.getCompression()); assertFalse(input.getMetadata().isReadSeekEfficient()); try { assertEquals("Hello world", input.readFullyAsUTF8String());
@ProcessElement public void process(ProcessContext c) { MatchResult.Metadata metadata = c.element(); if (metadata.resourceId().isDirectory()) { switch (spec.getDirectoryTreatment()) { case SKIP: return; case PROHIBIT: throw new IllegalArgumentException( "Trying to read " + metadata.resourceId() + " which is a directory"); default: throw new UnsupportedOperationException( "Unknown DirectoryTreatment: " + spec.getDirectoryTreatment()); } } Compression compression = (spec.getCompression() == Compression.AUTO) ? Compression.detect(metadata.resourceId().getFilename()) : spec.getCompression(); c.output( new ReadableFile( MatchResult.Metadata.builder() .setResourceId(metadata.resourceId()) .setSizeBytes(metadata.sizeBytes()) .setIsReadSeekEfficient( metadata.isReadSeekEfficient() && compression == Compression.UNCOMPRESSED) .build(), compression)); } }
/** * Determines whether a file represented by this source is can be split into bundles. * * <p>By default, a source in mode {@link Mode#FILEPATTERN} is always splittable, because * splitting will involve expanding the file pattern and producing single-file/subrange sources, * which may or may not be splittable themselves. * * <p>By default, a source in {@link Mode#SINGLE_FILE_OR_SUBRANGE} is splittable if it is on a * file system that supports efficient read seeking. * * <p>Subclasses may override to provide different behavior. */ protected boolean isSplittable() throws Exception { if (mode == Mode.FILEPATTERN) { // split will expand file pattern and return single file or subrange sources that // may or may not be splittable. return true; } return getSingleFileMetadata().isReadSeekEfficient(); }
@Override public void encode(Metadata value, OutputStream os) throws IOException { RESOURCE_ID_CODER.encode(value.resourceId(), os); INT_CODER.encode(value.isReadSeekEfficient() ? 1 : 0, os); LONG_CODER.encode(value.sizeBytes(), os); }
/** * Returns a {@link SeekableByteChannel} equivalent to {@link #open}, but fails if this file is * not {@link MatchResult.Metadata#isReadSeekEfficient seekable}. */ public SeekableByteChannel openSeekable() throws IOException { checkState( getMetadata().isReadSeekEfficient(), "The file %s is not seekable", metadata.resourceId()); return (SeekableByteChannel) open(); }
@ProcessElement public void process(ProcessContext c) { Metadata metadata = c.element().getMetadata(); if (!metadata.isReadSeekEfficient()) { c.output(KV.of(c.element(), new OffsetRange(0, metadata.sizeBytes()))); return; } for (OffsetRange range : new OffsetRange(0, metadata.sizeBytes()).split(desiredBundleSizeBytes, 0)) { c.output(KV.of(c.element(), range)); } } }