@Override public BCF2Codec makeCodec() { return new BCF2Codec(); }
@Override public Feature decodeLoc( final PositionalBufferedStream inputStream ) { return decode(inputStream); }
private void initContigDict() { final VCFHeader header = (VCFHeader)codec.readHeader(in).getHeaderValue(); contigDict.clear(); int i = 0; for (final VCFContigHeaderLine contig : header.getContigLines()) contigDict.put(contig.getID(), i++); }
BCFInputStreamIterator(final InputStream inputStream) { this.inputStream = this.codec.makeSourceFromStream(inputStream); this.vcfHeader = (VCFHeader) this.codec.readHeader(this.inputStream).getHeaderValue(); }
/** * Decode the sites level data from this classes decoder * * @param builder * @return */ private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) throws IOException { final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT); if ( qual != null ) { builder.log10PError(((Double)qual) / -10.0); } final int nAlleleInfo = decoder.decodeInt(BCF2Type.INT32); final int nFormatSamples = decoder.decodeInt(BCF2Type.INT32); final int nAlleles = nAlleleInfo >> 16; final int nInfo = nAlleleInfo & 0x0000FFFF; final int nFormatFields = nFormatSamples >> 24; final int nSamples = nFormatSamples & 0x00FFFFF; if ( header.getNGenotypeSamples() != nSamples ) error("Reading BCF2 files with different numbers of samples per record " + "is not currently supported. Saw " + header.getNGenotypeSamples() + " samples in header but have a record with " + nSamples + " samples"); decodeID(builder); final List<Allele> alleles = decodeAlleles(builder, pos, nAlleles); decodeFilter(builder); decodeInfo(builder, nInfo); final SitesInfoForDecoding info = new SitesInfoForDecoding(nFormatFields, nSamples, alleles); if ( ! info.isValid() ) error("Sites info is malformed: " + info); return info; }
@Ignore //@Test public void rawTestFile() throws Exception { String path = "/path/to/myfile.bcf"; PositionalBufferedStream ps = new PositionalBufferedStream(new FileInputStream(path)); BCF2Codec codec = new BCF2Codec(); codec.readHeader(ps); }
final PositionalBufferedStream bodyPbs = new PositionalBufferedStream(new FileInputStream(bcfOutputHeaderlessFile))) { BCF2Codec codec = new BCF2Codec(); codec.readHeader(headerPbs); VariantContext vc = codec.decode(bodyPbs); counter++;
@Override public VariantContext decode( final PositionalBufferedStream inputStream ) { try { recordNo++; final VariantContextBuilder builder = new VariantContextBuilder(); final int sitesBlockSize = decoder.readBlockSize(inputStream); final int genotypeBlockSize = decoder.readBlockSize(inputStream); decoder.readNextBlock(sitesBlockSize, inputStream); decodeSiteLoc(builder); final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder); decoder.readNextBlock(genotypeBlockSize, inputStream); createLazyGenotypesDecoder(info, builder); return builder.fullyDecoded(true).make(); } catch ( IOException e ) { throw new TribbleException("Failed to read BCF file", e); } }
new BCF2Codec().readHeader( new PositionalBufferedStream(bin));
vcfWriter.writeHeader(vcfFile.getFileHeader()); BCF2Codec codec = new BCF2Codec(); codec.readHeader(headerPbs); VariantContext vc = codec.decode(headerPbs); Assert.assertEquals(vc.getGenotypes().stream().filter(Genotype::isPhased).count(), 2); vcfWriter.add(vc);
/** * Decode the sites level data from this classes decoder * * @param builder * @return */ private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) throws IOException { final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT); if ( qual != null ) { builder.log10PError(((Double)qual) / -10.0); } final int nAlleleInfo = decoder.decodeInt(BCF2Type.INT32); final int nFormatSamples = decoder.decodeInt(BCF2Type.INT32); final int nAlleles = nAlleleInfo >> 16; final int nInfo = nAlleleInfo & 0x0000FFFF; final int nFormatFields = nFormatSamples >> 24; final int nSamples = nFormatSamples & 0x00FFFFF; if ( header.getNGenotypeSamples() != nSamples ) error("Reading BCF2 files with different numbers of samples per record " + "is not currently supported. Saw " + header.getNGenotypeSamples() + " samples in header but have a record with " + nSamples + " samples"); decodeID(builder); final List<Allele> alleles = decodeAlleles(builder, pos, nAlleles); decodeFilter(builder); decodeInfo(builder, nInfo); final SitesInfoForDecoding info = new SitesInfoForDecoding(nFormatFields, nSamples, alleles); if ( ! info.isValid() ) error("Sites info is malformed: " + info); return info; }
@Override public VariantContext decode( final PositionalBufferedStream inputStream ) { try { recordNo++; final VariantContextBuilder builder = new VariantContextBuilder(); final int sitesBlockSize = decoder.readBlockSize(inputStream); final int genotypeBlockSize = decoder.readBlockSize(inputStream); decoder.readNextBlock(sitesBlockSize, inputStream); decodeSiteLoc(builder); final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder); decoder.readNextBlock(genotypeBlockSize, inputStream); createLazyGenotypesDecoder(info, builder); return builder.fullyDecoded(true).make(); } catch ( IOException e ) { throw new TribbleException("Failed to read BCF file", e); } }
BCFInputStreamIterator(final InputStream inputStream) { this.inputStream = this.codec.makeSourceFromStream(inputStream); this.vcfHeader = (VCFHeader) this.codec.readHeader(this.inputStream).getHeaderValue(); }
public RecordReader<String, VCONTEXT> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { String loaderJson; String queryJson; GenomicsDBFeatureReader<VCONTEXT, SOURCE> featureReader; GenomicsDBRecordReader<VCONTEXT, SOURCE> recordReader; if (taskAttemptContext != null) { Configuration configuration = taskAttemptContext.getConfiguration(); loaderJson = configuration.get(GenomicsDBConfiguration.LOADERJSON); queryJson = configuration.get(GenomicsDBConfiguration.QUERYJSON); } else { // If control comes here, means this method is called from // GenomicsDBRDD. Hence, the configuration object must be // set by setConf method, else this will lead to // NullPointerException assert(configuration!=null); loaderJson = configuration.get(GenomicsDBConfiguration.LOADERJSON); queryJson = configuration.get(GenomicsDBConfiguration.QUERYJSON); } featureReader = new GenomicsDBFeatureReader<VCONTEXT, SOURCE>( loaderJson, queryJson, (FeatureCodec<VCONTEXT, SOURCE>) new BCF2Codec()); recordReader = new GenomicsDBRecordReader<VCONTEXT, SOURCE>(featureReader); return recordReader; }
public static VCFHeader readHeaderFrom(final SeekableStream in) throws IOException { Object headerCodec = null; Object header = null; final long initialPos = in.position(); try { BufferedInputStream bis = new BufferedInputStream(in); InputStream is = VCFFormat.isGzip(bis) ? new GZIPInputStream(bis) : bis; headerCodec = new VCFCodec().readHeader(new AsciiLineReaderIterator(new AsciiLineReader(is))); } catch (TribbleException e) { logger.warn("Exception while trying to read VCF header from file:", e); in.seek(initialPos); InputStream bin = new BufferedInputStream(in); if (BlockCompressedInputStream.isValidFile(bin)) bin = new BlockCompressedInputStream(bin); headerCodec = new BCF2Codec().readHeader( new PositionalBufferedStream(bin)); } if (!(headerCodec instanceof FeatureCodecHeader)) throw new IOException("No VCF header found"); header = ((FeatureCodecHeader)headerCodec).getHeaderValue(); return (VCFHeader)header; } }
error("Reading BCF2 files with different numbers of samples per record " + "is not currently supported. Saw " + header.getNGenotypeSamples() + " samples in header but have a record with " + nSamples + " samples"); decodeID(builder); final List<Allele> alleles = decodeAlleles(builder, pos, nAlleles); decodeFilter(builder); decodeInfo(builder, nInfo); error("Sites info is malformed: " + info); return info;
@Override public Feature decodeLoc( final PositionalBufferedStream inputStream ) { return decode(inputStream); }
private void initContigDict() { final VCFHeader header = (VCFHeader)codec.readHeader(in).getHeaderValue(); contigDict.clear(); int i = 0; for (final VCFContigHeaderLine contig : header.getContigLines()) contigDict.put(contig.getID(), i++); }