private final int computeMaxSizeOfGenotypeFieldFromValues(final VariantContext vc) { int size = -1; for ( final Genotype g : vc.getGenotypes() ) { size = Math.max(size, numElements(vc, g)); } return size; } }
@Override public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException { // the only value that is dynamic are integers final List<Integer> values = new ArrayList<Integer>(vc.getNSamples()); for ( final Genotype g : vc.getGenotypes() ) { for ( final Integer i : BCF2Utils.toList(Integer.class, g.getExtendedAttribute(getField(), null)) ) { if ( i != null ) values.add(i); } } encodingType = BCF2Utils.determineIntegerType(values); super.start(encoder, vc); } }
private byte[] buildSamplesData(final VariantContext vc) throws IOException { final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects if ( lazyData != null ) { // we never decoded any data from this BCF file, so just pass it back return lazyData.bytes; } // we have to do work to convert the VC into a BCF2 byte stream final List<String> genotypeFields = vc.calcVCFGenotypeKeys(header); for ( final String field : genotypeFields ) { final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT"); assert writer != null; writer.start(encoder, vc); for ( final String name : sampleNames ) { Genotype g = vc.getGenotype(name); if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype); writer.addGenotype(encoder, vc, g); } writer.done(encoder, vc); } return encoder.getRecordBytes(); }
private byte[] buildSamplesData(final VariantContext vc) throws IOException { final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects if ( lazyData != null ) { // we never decoded any data from this BCF file, so just pass it back return lazyData.bytes; } // we have to do work to convert the VC into a BCF2 byte stream final List<String> genotypeFields = vc.calcVCFGenotypeKeys(header); for ( final String field : genotypeFields ) { final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT"); assert writer != null; writer.start(encoder, vc); for ( final String name : sampleNames ) { Genotype g = vc.getGenotype(name); if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype); writer.addGenotype(encoder, vc, g); } writer.done(encoder, vc); } return encoder.getRecordBytes(); }
@Override @Requires({"encodingType != null", "nValuesPerGenotype >= 0 || ! getFieldEncoder().hasConstantNumElements()"}) @Ensures("nValuesPerGenotype >= 0") public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException { // writes the key information super.start(encoder, vc); // only update if we need to if ( ! getFieldEncoder().hasConstantNumElements() ) { if ( getFieldEncoder().hasContextDeterminedNumElements() ) // we are cheap -- just depends on genotype of allele counts nValuesPerGenotype = getFieldEncoder().numElements(vc); else // we have to go fishing through the values themselves (expensive) nValuesPerGenotype = computeMaxSizeOfGenotypeFieldFromValues(vc); } encoder.encodeType(nValuesPerGenotype, encodingType); }
private byte[] buildSamplesData(final VariantContext vc) throws IOException { final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects if ( lazyData != null ) { // we never decoded any data from this BCF file, so just pass it back return lazyData.bytes; } // we have to do work to convert the VC into a BCF2 byte stream final List<String> genotypeFields = vc.calcVCFGenotypeKeys(header); for ( final String field : genotypeFields ) { final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT"); assert writer != null; writer.start(encoder, vc); for ( final String name : sampleNames ) { Genotype g = vc.getGenotype(name); if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype); writer.addGenotype(encoder, vc, g); } writer.done(encoder, vc); } return encoder.getRecordBytes(); }
@Override public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException { // TODO // TODO this piece of code consumes like 10% of the runtime alone because fo the vc.getGenotypes() iteration // TODO encodingType = BCF2Type.INT8; for ( final Genotype g : vc.getGenotypes() ) { final int[] pls = ige.getValues(g); final BCF2Type plsType = getFieldEncoder().getType(pls); encodingType = BCF2Utils.maxIntegerType(encodingType, plsType); if ( encodingType == BCF2Type.INT32 ) break; // stop early } super.start(encoder, vc); }
@Override public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException { // TODO // TODO this piece of code consumes like 10% of the runtime alone because fo the vc.getGenotypes() iteration // TODO encodingType = BCF2Type.INT8; for ( final Genotype g : vc.getGenotypes() ) { final int[] pls = ige.getValues(g); final BCF2Type plsType = getFieldEncoder().getType(pls); encodingType = BCF2Utils.maxIntegerType(encodingType, plsType); if ( encodingType == BCF2Type.INT32 ) break; // stop early } super.start(encoder, vc); }
@Override public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException { // writes the key information super.start(encoder, vc); // only update if we need to if ( ! getFieldEncoder().hasConstantNumElements() ) { if ( getFieldEncoder().hasContextDeterminedNumElements() ) // we are cheap -- just depends on genotype of allele counts nValuesPerGenotype = getFieldEncoder().numElements(vc); else // we have to go fishing through the values themselves (expensive) nValuesPerGenotype = computeMaxSizeOfGenotypeFieldFromValues(vc); } encoder.encodeType(nValuesPerGenotype, encodingType); }
@Override public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException { // writes the key information super.start(encoder, vc); // only update if we need to if ( ! getFieldEncoder().hasConstantNumElements() ) { if ( getFieldEncoder().hasContextDeterminedNumElements() ) // we are cheap -- just depends on genotype of allele counts nValuesPerGenotype = getFieldEncoder().numElements(vc); else // we have to go fishing through the values themselves (expensive) nValuesPerGenotype = computeMaxSizeOfGenotypeFieldFromValues(vc); } encoder.encodeType(nValuesPerGenotype, encodingType); }
@Override public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException { // TODO // TODO this piece of code consumes like 10% of the runtime alone because fo the vc.getGenotypes() iteration // TODO encodingType = BCF2Type.INT8; for ( final Genotype g : vc.getGenotypes() ) { final int[] pls = ige.getValues(g); final BCF2Type plsType = getFieldEncoder().getType(pls); encodingType = BCF2Utils.maxIntegerType(encodingType, plsType); if ( encodingType == BCF2Type.INT32 ) break; // stop early } super.start(encoder, vc); }
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException { final Object fieldValue = g.getExtendedAttribute(getField(), null); getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype); }
protected int numElements(final VariantContext vc, final Genotype g) { return getFieldEncoder().numElements(vc, g.getExtendedAttribute(getField())); }
protected int numElements(final VariantContext vc, final Genotype g) { return getFieldEncoder().numElements(vc, g.getExtendedAttribute(getField())); }
protected GenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) { super(header, fieldEncoder); if ( fieldEncoder.hasConstantNumElements() ) { nValuesPerGenotype = getFieldEncoder().numElements(); } }
@Ensures({"result >= 0"}) protected int numElements(final VariantContext vc, final Genotype g) { return getFieldEncoder().numElements(vc, g.getExtendedAttribute(getField())); }
@Override public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException { if ( vc.getNAlleles() > BCF2Utils.MAX_ALLELES_IN_GENOTYPES ) throw new IllegalStateException("Current BCF2 encoder cannot handle sites " + "with > " + BCF2Utils.MAX_ALLELES_IN_GENOTYPES + " alleles, but you have " + vc.getNAlleles() + " at " + vc.getContig() + ":" + vc.getStart()); encodingType = BCF2Type.INT8; buildAlleleMap(vc); nValuesPerGenotype = vc.getMaxPloidy(2); super.start(encoder, vc); }
@Requires({"encodingType != null", "nValuesPerGenotype >= 0"}) public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException { final Object fieldValue = g.getExtendedAttribute(getField(), null); getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype); }
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException { final Object fieldValue = g.getExtendedAttribute(getField(), null); getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype); }
@Override public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException { // the only value that is dynamic are integers final List<Integer> values = new ArrayList<Integer>(vc.getNSamples()); for ( final Genotype g : vc.getGenotypes() ) { for ( final Integer i : BCF2Utils.toList(Integer.class, g.getExtendedAttribute(getField(), null)) ) { if ( i != null ) values.add(i); } } encodingType = BCF2Utils.determineIntegerType(values); super.start(encoder, vc); } }