private BCF2FieldWriter.SiteWriter createInfoWriter(final VCFHeader header, final VCFInfoHeaderLine line, final BCF2Encoder encoder, final Map<String, Integer> dict) { return new BCF2FieldWriter.GenericSiteWriter(header, createFieldEncoder(line, encoder, dict, false)); }
/** * Get a genotypes writer specialized to encode values for genotypes field * @param field key found in the VCF header FORMAT records * @return non-null writer if one can be found, or null if none exists for field */ public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String field) { return getWriter(field, genotypesWriters); }
/** * Setup the FieldWriters appropriate to each INFO and FORMAT in the VCF header * * Must be called before any of the getter methods will work * * @param header a VCFHeader containing description for every INFO and FORMAT field we'll attempt to write out to BCF * @param encoder the encoder we are going to use to write out the BCF2 data * @param stringDictionary a map from VCFHeader strings to their offsets for encoding */ public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map<String, Integer> stringDictionary) { for (final VCFInfoHeaderLine line : header.getInfoHeaderLines()) { final String field = line.getID(); final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, line, encoder, stringDictionary); add(siteWriters, field, writer); } for (final VCFFormatHeaderLine line : header.getFormatHeaderLines()) { final String field = line.getID(); final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, line, encoder, stringDictionary); add(genotypesWriters, field, writer); } }
private void buildInfo( VariantContext vc ) throws IOException { for ( Map.Entry<String, Object> infoFieldEntry : vc.getAttributes().entrySet() ) { final String field = infoFieldEntry.getKey(); final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(field); if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "INFO"); writer.start(encoder, vc); writer.site(encoder, vc); writer.done(encoder, vc); } }
private byte[] buildSamplesData(final VariantContext vc) throws IOException { final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects if ( lazyData != null ) { // we never decoded any data from this BCF file, so just pass it back return lazyData.bytes; } // we have to do work to convert the VC into a BCF2 byte stream final List<String> genotypeFields = vc.calcVCFGenotypeKeys(header); for ( final String field : genotypeFields ) { final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT"); assert writer != null; writer.start(encoder, vc); for ( final String name : sampleNames ) { Genotype g = vc.getGenotype(name); if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype); writer.addGenotype(encoder, vc, g); } writer.done(encoder, vc); } return encoder.getRecordBytes(); }
@Override public void setHeader(final VCFHeader header) { if (outputHasBeenWritten) { throw new IllegalStateException("The header cannot be modified after the header or variants have been written to the output stream."); } // make sure the header is sorted correctly this.header = doNotWriteGenotypes ? new VCFHeader(header.getMetaDataInSortedOrder()) : new VCFHeader( header.getMetaDataInSortedOrder(), header.getGenotypeSamples()); // create the config offsets map if ( this.header.getContigLines().isEmpty() ) { if ( ALLOW_MISSING_CONTIG_LINES ) { if ( GeneralUtils.DEBUG_MODE_ENABLED ) { System.err.println("No contig dictionary found in header, falling back to reference sequence dictionary"); } createContigDictionary(VCFUtils.makeContigHeaderLines(getRefDict(), null)); } else { throw new IllegalStateException("Cannot write BCF2 file with missing contig lines"); } } else { createContigDictionary(this.header.getContigLines()); } // set up the map from dictionary string values -> offset final ArrayList<String> dict = BCF2Utils.makeDictionary(this.header); for ( int i = 0; i < dict.size(); i++ ) { stringDictionaryMap.put(dict.get(i), i); } sampleNames = this.header.getGenotypeSamples().toArray(new String[this.header.getNGenotypeSamples()]); // setup the field encodings fieldManager.setup(this.header, encoder, stringDictionaryMap); }
/** * Setup the FieldWriters appropriate to each INFO and FORMAT in the VCF header * * Must be called before any of the getter methods will work * * @param header a VCFHeader containing description for every INFO and FORMAT field we'll attempt to write out to BCF * @param encoder the encoder we are going to use to write out the BCF2 data * @param stringDictionary a map from VCFHeader strings to their offsets for encoding */ public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map<String, Integer> stringDictionary) { for (final VCFInfoHeaderLine line : header.getInfoHeaderLines()) { final String field = line.getID(); final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, line, encoder, stringDictionary); add(siteWriters, field, writer); } for (final VCFFormatHeaderLine line : header.getFormatHeaderLines()) { final String field = line.getID(); final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, line, encoder, stringDictionary); add(genotypesWriters, field, writer); } }
private void buildInfo( VariantContext vc ) throws IOException { for ( Map.Entry<String, Object> infoFieldEntry : vc.getAttributes().entrySet() ) { final String field = infoFieldEntry.getKey(); final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(field); if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "INFO"); writer.start(encoder, vc); writer.site(encoder, vc); writer.done(encoder, vc); } }
private byte[] buildSamplesData(final VariantContext vc) throws IOException { final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects if ( lazyData != null ) { // we never decoded any data from this BCF file, so just pass it back return lazyData.bytes; } // we have to do work to convert the VC into a BCF2 byte stream final List<String> genotypeFields = vc.calcVCFGenotypeKeys(header); for ( final String field : genotypeFields ) { final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT"); assert writer != null; writer.start(encoder, vc); for ( final String name : sampleNames ) { Genotype g = vc.getGenotype(name); if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype); writer.addGenotype(encoder, vc, g); } writer.done(encoder, vc); } return encoder.getRecordBytes(); }
@Override public void setHeader(final VCFHeader header) { if (outputHasBeenWritten) { throw new IllegalStateException("The header cannot be modified after the header or variants have been written to the output stream."); } // make sure the header is sorted correctly this.header = doNotWriteGenotypes ? new VCFHeader(header.getMetaDataInSortedOrder()) : new VCFHeader( header.getMetaDataInSortedOrder(), header.getGenotypeSamples()); // create the config offsets map if ( this.header.getContigLines().isEmpty() ) { if ( ALLOW_MISSING_CONTIG_LINES ) { if ( GeneralUtils.DEBUG_MODE_ENABLED ) { System.err.println("No contig dictionary found in header, falling back to reference sequence dictionary"); } createContigDictionary(VCFUtils.makeContigHeaderLines(getRefDict(), null)); } else { throw new IllegalStateException("Cannot write BCF2 file with missing contig lines"); } } else { createContigDictionary(this.header.getContigLines()); } // set up the map from dictionary string values -> offset final ArrayList<String> dict = BCF2Utils.makeDictionary(this.header); for ( int i = 0; i < dict.size(); i++ ) { stringDictionaryMap.put(dict.get(i), i); } sampleNames = this.header.getGenotypeSamples().toArray(new String[this.header.getNGenotypeSamples()]); // setup the field encodings fieldManager.setup(this.header, encoder, stringDictionaryMap); }
/** * Setup the FieldWriters appropriate to each INFO and FORMAT in the VCF header * * Must be called before any of the getter methods will work * * @param header a VCFHeader containing description for every INFO and FORMAT field we'll attempt to write out to BCF * @param encoder the encoder we are going to use to write out the BCF2 data * @param stringDictionary a map from VCFHeader strings to their offsets for encoding */ public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map<String, Integer> stringDictionary) { for (final VCFInfoHeaderLine line : header.getInfoHeaderLines()) { final String field = line.getID(); final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, line, encoder, stringDictionary); add(siteWriters, field, writer); } for (final VCFFormatHeaderLine line : header.getFormatHeaderLines()) { final String field = line.getID(); final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, line, encoder, stringDictionary); add(genotypesWriters, field, writer); } }
/** * Get a site writer specialized to encode values for site info field * @param field key found in the VCF header INFO records * @return non-null writer if one can be found, or null if none exists for field */ public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) { return getWriter(field, siteWriters); }
private BCF2FieldWriter.SiteWriter createInfoWriter(final VCFHeader header, final VCFInfoHeaderLine line, final BCF2Encoder encoder, final Map<String, Integer> dict) { return new BCF2FieldWriter.GenericSiteWriter(header, createFieldEncoder(line, encoder, dict, false)); }
private void buildInfo( VariantContext vc ) throws IOException { for ( Map.Entry<String, Object> infoFieldEntry : vc.getAttributes().entrySet() ) { final String field = infoFieldEntry.getKey(); final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(field); if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "INFO"); writer.start(encoder, vc); writer.site(encoder, vc); writer.done(encoder, vc); } }
private byte[] buildSamplesData(final VariantContext vc) throws IOException { final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects if ( lazyData != null ) { // we never decoded any data from this BCF file, so just pass it back return lazyData.bytes; } // we have to do work to convert the VC into a BCF2 byte stream final List<String> genotypeFields = vc.calcVCFGenotypeKeys(header); for ( final String field : genotypeFields ) { final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT"); assert writer != null; writer.start(encoder, vc); for ( final String name : sampleNames ) { Genotype g = vc.getGenotype(name); if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype); writer.addGenotype(encoder, vc, g); } writer.done(encoder, vc); } return encoder.getRecordBytes(); }
fieldManager.setup(header, encoder, stringDictionaryMap);
/** * Get a site writer specialized to encode values for site info field * @param field key found in the VCF header INFO records * @return non-null writer if one can be found, or null if none exists for field */ public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) { return getWriter(field, siteWriters); }
private BCF2FieldWriter.SiteWriter createInfoWriter(final VCFHeader header, final VCFInfoHeaderLine line, final BCF2Encoder encoder, final Map<String, Integer> dict) { return new BCF2FieldWriter.GenericSiteWriter(header, createFieldEncoder(line, encoder, dict, false)); }
/** * Get a site writer specialized to encode values for site info field * @param field key found in the VCF header INFO records * @return non-null writer if one can be found, or null if none exists for field */ public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) { return getWriter(field, siteWriters); }
private BCF2FieldWriter.GenotypesWriter createGenotypesWriter(final VCFHeader header, final VCFFormatHeaderLine line, final BCF2Encoder encoder, final Map<String, Integer> dict) { final String field = line.getID(); final BCF2FieldEncoder fieldEncoder = createFieldEncoder(line, encoder, dict, true); if ( field.equals(VCFConstants.GENOTYPE_KEY) ) { return new BCF2FieldWriter.GTWriter(header, fieldEncoder); } else if ( line.getID().equals(VCFConstants.GENOTYPE_FILTER_KEY) ) { return new BCF2FieldWriter.FTGenotypesWriter(header, fieldEncoder); } else if ( intGenotypeFieldAccessors.getAccessor(field) != null ) { return new BCF2FieldWriter.IGFGenotypesWriter(header, fieldEncoder, intGenotypeFieldAccessors.getAccessor(field)); } else if ( line.getType() == VCFHeaderLineType.Integer ) { return new BCF2FieldWriter.IntegerTypeGenotypesWriter(header, fieldEncoder); } else { return new BCF2FieldWriter.StaticallyTypeGenotypesWriter(header, fieldEncoder); } }