private static boolean isBlockCompressed(List<Path> parts) throws IOException { try (InputStream in = new BufferedInputStream(Files.newInputStream(parts.get(0)))) { return BlockCompressedInputStream.isValidFile(in); } }
private static boolean isBlockCompressed(List<Path> parts) throws IOException { try (InputStream in = new BufferedInputStream(Files.newInputStream(parts.get(0)))) { return BlockCompressedInputStream.isValidFile(in); } }
private static boolean isBlockCompressed(List<Path> parts) throws IOException { try (InputStream in = new BufferedInputStream(Files.newInputStream(parts.get(0)))) { return BlockCompressedInputStream.isValidFile(in); } }
/** * Return true if the specified file is a block compressed gzip (BGZF) input stream. * * @param inputStream input stream, must not be null * @return true if the specified file is a block compressed gzip (BGZF) input stream */ public static boolean isBgzfInputStream(final InputStream inputStream) { checkNotNull(inputStream); BufferedInputStream bufferedInputStream = inputStream instanceof BufferedInputStream ? (BufferedInputStream) inputStream : new BufferedInputStream(inputStream); try { return BlockCompressedInputStream.isValidFile(bufferedInputStream); } catch (IOException e) { return false; } }
/** * Checks if the provided path is block-compressed. * * <p>Note that using {@code checkExtension=true} would avoid the cost of opening the file, but * if {@link #hasBlockCompressedExtension(String)} returns {@code false} this would not detect * block-compressed files such BAM. * * @param path file to check if it is block-compressed. * @param checkExtension if {@code true}, checks the extension before opening the file. * @return {@code true} if the file is block-compressed; {@code false} otherwise. * @throws IOException if there is an I/O error. */ public static boolean isBlockCompressed(final Path path, final boolean checkExtension) throws IOException { if (checkExtension && !hasBlockCompressedExtension(path)) { return false; } try (final InputStream stream = new BufferedInputStream(Files.newInputStream(path), Math.max(Defaults.BUFFER_SIZE, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE))) { return BlockCompressedInputStream.isValidFile(stream); } }
/** * Checks if the provided path is block-compressed. * * <p>Note that using {@code checkExtension=true} would avoid the cost of opening the file, but * if {@link #hasBlockCompressedExtension(String)} returns {@code false} this would not detect * block-compressed files such BAM. * * @param path file to check if it is block-compressed. * @param checkExtension if {@code true}, checks the extension before opening the file. * @return {@code true} if the file is block-compressed; {@code false} otherwise. * @throws IOException if there is an I/O error. */ public static boolean isBlockCompressed(final Path path, final boolean checkExtension) throws IOException { if (checkExtension && !hasBlockCompressedExtension(path)) { return false; } try (final InputStream stream = new BufferedInputStream(Files.newInputStream(path), Math.max(Defaults.BUFFER_SIZE, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE))) { return BlockCompressedInputStream.isValidFile(stream); } }
/** * @param stream stream.markSupported() must be true * @return true if this looks like a BAM file. */ private boolean isBAMFile(final InputStream stream) throws IOException { if (!BlockCompressedInputStream.isValidFile(stream)) { return false; } final int buffSize = BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE; stream.mark(buffSize); final byte[] buffer = new byte[buffSize]; readBytes(stream, buffer, 0, buffSize); stream.reset(); final byte[] magicBuf = new byte[4]; final int magicLength = readBytes(new BlockCompressedInputStream(new ByteArrayInputStream(buffer)), magicBuf, 0, 4); return magicLength == BAMFileConstants.BAM_MAGIC.length && Arrays.equals(BAMFileConstants.BAM_MAGIC, magicBuf); }
/** * @param stream stream.markSupported() must be true * @return true if this looks like a BAM file. */ public static boolean isBAMFile(final InputStream stream) throws IOException { if (!BlockCompressedInputStream.isValidFile(stream)) { return false; } final int buffSize = BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE; stream.mark(buffSize); final byte[] buffer = new byte[buffSize]; readBytes(stream, buffer, 0, buffSize); stream.reset(); final byte[] magicBuf = new byte[4]; final int magicLength = readBytes(new BlockCompressedInputStream(new ByteArrayInputStream(buffer)), magicBuf, 0, 4); return magicLength == BAMFileConstants.BAM_MAGIC.length && Arrays.equals(BAMFileConstants.BAM_MAGIC, magicBuf); }
@Override protected boolean isSplitable(JobContext context, Path filename) { Configuration conf = context.getConfiguration(); final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(filename); if (codec == null) { return true; } if (codec instanceof BGZFCodec || codec instanceof BGZFEnhancedGzipCodec) { boolean splittable; try { try (FSDataInputStream in = filename.getFileSystem(conf).open(filename)) { splittable = BlockCompressedInputStream.isValidFile(new BufferedInputStream(in)); } } catch (IOException e) { // can't determine if BGZF or GZIP, conservatively assume latter splittable = false; } if (!splittable) { logger.warn("{} is not splittable, consider using block-compressed gzip (BGZF)", filename); } return splittable; } else if (codec instanceof GzipCodec) { logger.warn("Using GzipCodec, which is not splittable, consider using block compressed gzip (BGZF) and BGZFCodec/BGZFEnhancedGzipCodec."); } return codec instanceof SplittableCompressionCodec; }
@Override protected boolean isSplitable(JobContext context, Path filename) { Configuration conf = context.getConfiguration(); final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(filename); if (codec == null) { return true; } if (codec instanceof BGZFCodec || codec instanceof BGZFEnhancedGzipCodec) { boolean splittable; try { try (FSDataInputStream in = filename.getFileSystem(conf).open(filename)) { splittable = BlockCompressedInputStream.isValidFile(new BufferedInputStream(in)); } } catch (IOException e) { // can't determine if BGZF or GZIP, conservatively assume latter splittable = false; } if (!splittable) { logger.warn("{} is not splittable, consider using block-compressed gzip (BGZF)", filename); } return splittable; } else if (codec instanceof GzipCodec) { logger.warn("Using GzipCodec, which is not splittable, consider using block compressed gzip (BGZF) and BGZFCodec/BGZFEnhancedGzipCodec."); } return codec instanceof SplittableCompressionCodec; }
@Override public boolean canRead(File file) { final byte[] BAM_MAGIC = "BAM\1".getBytes(); final byte[] buffer = new byte[BAM_MAGIC.length]; try { InputStream fstream = new BufferedInputStream(new FileInputStream(file)); if ( !BlockCompressedInputStream.isValidFile(fstream) ) return false; final BlockCompressedInputStream BCIS = new BlockCompressedInputStream(fstream); BCIS.read(buffer, 0, BAM_MAGIC.length); BCIS.close(); return Arrays.equals(buffer, BAM_MAGIC); } catch ( IOException e ) { return false; } catch ( htsjdk.samtools.FileTruncatedException e ) { return false; } } }
/** * @param stream stream.markSupported() must be true * @return true if this looks like a BAM file. */ public static boolean isBAMFile(final InputStream stream) throws IOException { if (!BlockCompressedInputStream.isValidFile(stream)) { return false; } final int buffSize = BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE; stream.mark(buffSize); final byte[] buffer = new byte[buffSize]; readBytes(stream, buffer, 0, buffSize); stream.reset(); try(final BlockCompressedInputStream bcis = new BlockCompressedInputStream(new ByteArrayInputStream(buffer))){ final byte[] magicBuf = new byte[4]; final int magicLength = readBytes(bcis, magicBuf, 0, 4); return magicLength == BAMFileConstants.BAM_MAGIC.length && Arrays.equals(BAMFileConstants.BAM_MAGIC, magicBuf); } }
/** * @param stream stream.markSupported() must be true * @return true if this looks like a BAM file. */ public static boolean isBAMFile(final InputStream stream) throws IOException { if (!BlockCompressedInputStream.isValidFile(stream)) { return false; } final int buffSize = BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE; stream.mark(buffSize); final byte[] buffer = new byte[buffSize]; readBytes(stream, buffer, 0, buffSize); stream.reset(); try(final BlockCompressedInputStream bcis = new BlockCompressedInputStream(new ByteArrayInputStream(buffer))){ final byte[] magicBuf = new byte[4]; final int magicLength = readBytes(bcis, magicBuf, 0, 4); return magicLength == BAMFileConstants.BAM_MAGIC.length && Arrays.equals(BAMFileConstants.BAM_MAGIC, magicBuf); } }
public BCFSplitGuesser(SeekableStream ss, InputStream headerStream) throws IOException { inFile = ss; InputStream bInFile = new BufferedInputStream(inFile); bgzf = BlockCompressedInputStream.isValidFile(bInFile); if (bgzf) bInFile = new BlockCompressedInputStream(bInFile); // Excess buffering here but it can't be helped that BCF2Codec only takes // PositionalBufferedStream. final VCFHeader header = (VCFHeader)bcfCodec.readHeader( new PositionalBufferedStream(bInFile)).getHeaderValue(); contigDictionaryLength = header.getContigLines().size(); genotypeSampleCount = header.getNGenotypeSamples(); }
public BCFSplitGuesser(SeekableStream ss, InputStream headerStream) throws IOException { inFile = ss; InputStream bInFile = new BufferedInputStream(inFile); bgzf = BlockCompressedInputStream.isValidFile(bInFile); if (bgzf) bInFile = new BlockCompressedInputStream(bInFile); // Excess buffering here but it can't be helped that BCF2Codec only takes // PositionalBufferedStream. final VCFHeader header = (VCFHeader)bcfCodec.readHeader( new PositionalBufferedStream(bInFile)).getHeaderValue(); contigDictionaryLength = header.getContigLines().size(); genotypeSampleCount = header.getNGenotypeSamples(); }
public BCFSplitGuesser(SeekableStream ss, InputStream headerStream) throws IOException { inFile = ss; InputStream bInFile = new BufferedInputStream(inFile); bgzf = BlockCompressedInputStream.isValidFile(bInFile); if (bgzf) bInFile = new BlockCompressedInputStream(bInFile); // Excess buffering here but it can't be helped that BCF2Codec only takes // PositionalBufferedStream. final VCFHeader header = (VCFHeader)bcfCodec.readHeader( new PositionalBufferedStream(bInFile)).getHeaderValue(); contigDictionaryLength = header.getContigLines().size(); genotypeSampleCount = header.getNGenotypeSamples(); }
if (BlockCompressedInputStream.isValidFile(bin)) bin = new BlockCompressedInputStream(bin);
public static VCFHeader readHeaderFrom(final SeekableStream in) throws IOException { Object headerCodec = null; Object header = null; final long initialPos = in.position(); try { BufferedInputStream bis = new BufferedInputStream(in); InputStream is = VCFFormat.isGzip(bis) ? new GZIPInputStream(bis) : bis; headerCodec = new VCFCodec().readHeader(new AsciiLineReaderIterator(new AsciiLineReader(is))); } catch (TribbleException e) { logger.warn("Exception while trying to read VCF header from file:", e); in.seek(initialPos); InputStream bin = new BufferedInputStream(in); if (BlockCompressedInputStream.isValidFile(bin)) bin = new BlockCompressedInputStream(bin); headerCodec = new BCF2Codec().readHeader( new PositionalBufferedStream(bin)); } if (!(headerCodec instanceof FeatureCodecHeader)) throw new IOException("No VCF header found"); header = ((FeatureCodecHeader)headerCodec).getHeaderValue(); return (VCFHeader)header; } }
public static VCFHeader readHeaderFrom(final SeekableStream in) throws IOException { Object headerCodec = null; Object header = null; final long initialPos = in.position(); try { BufferedInputStream bis = new BufferedInputStream(in); InputStream is = VCFFormat.isGzip(bis) ? new GZIPInputStream(bis) : bis; headerCodec = new VCFCodec().readHeader(new AsciiLineReaderIterator(new AsciiLineReader(is))); } catch (TribbleException e) { logger.warn("Exception while trying to read VCF header from file:", e); in.seek(initialPos); InputStream bin = new BufferedInputStream(in); if (BlockCompressedInputStream.isValidFile(bin)) bin = new BlockCompressedInputStream(bin); headerCodec = new BCF2Codec().readHeader( new PositionalBufferedStream(bin)); } if (!(headerCodec instanceof FeatureCodecHeader)) throw new IOException("No VCF header found"); header = ((FeatureCodecHeader)headerCodec).getHeaderValue(); return (VCFHeader)header; } }
public void validateBamFileTermination(final File inputFile) { BufferedInputStream inputStream = null; try { inputStream = IOUtil.toBufferedStream(new FileInputStream(inputFile)); if (!BlockCompressedInputStream.isValidFile(inputStream)) { return; } final BlockCompressedInputStream.FileTermination terminationState = BlockCompressedInputStream.checkTermination(inputFile); if (terminationState.equals(BlockCompressedInputStream.FileTermination.DEFECTIVE)) { addError(new SAMValidationError(Type.TRUNCATED_FILE, "BAM file has defective last gzip block", inputFile.getPath())); } else if (terminationState.equals(BlockCompressedInputStream.FileTermination.HAS_HEALTHY_LAST_BLOCK)) { addError(new SAMValidationError(Type.BAM_FILE_MISSING_TERMINATOR_BLOCK, "Older BAM file -- does not have terminator block", inputFile.getPath())); } } catch (IOException e) { throw new SAMException("IOException", e); } finally { if (inputStream != null) { CloserUtil.close(inputStream); } } }