public final <T extends Object> void encodeRawValues(final Collection<T> v, final BCF2Type type) throws IOException { for ( final T v1 : v ) { encodeRawValue(v1, type); } }
/** * Totally generic encoder that examines o, determines the best way to encode it, and encodes it * * This method is incredibly slow, but it's only used for UnitTests so it doesn't matter * * @param o * @return */ public final BCF2Type encode(final Object o) throws IOException { if ( o == null ) throw new IllegalArgumentException("Generic encode cannot deal with null values"); if ( o instanceof List ) { final BCF2Type type = determineBCFType(((List) o).get(0)); encodeTyped((List) o, type); return type; } else { final BCF2Type type = determineBCFType(o); encodeTyped(o, type); return type; } }
public final void encodeTyped(final Object value, final BCF2Type type) throws IOException { if ( value == null ) encodeTypedMissing(type); else { switch ( type ) { case INT8: case INT16: case INT32: encodeTypedInt((Integer)value, type); break; case FLOAT: encodeTypedFloat((Double) value); break; case CHAR: encodeTypedString((String) value); break; default: throw new IllegalArgumentException("Illegal type encountered " + type); } } }
public final void encodeTypedString(final byte[] s) throws IOException { if ( s == null ) encodeType(0, BCF2Type.CHAR); else { encodeType(s.length, BCF2Type.CHAR); for ( int i = 0; i < s.length; i++ ) { encodeRawChar(s[i]); } } }
public void encodeRawString(final String s, final int sizeToWrite) throws IOException { final byte[] bytes = s.getBytes(); for ( int i = 0; i < sizeToWrite; i++ ) if ( i < bytes.length ) encodeRawChar(bytes[i]); else encodeRawMissingValue(BCF2Type.CHAR); }
private byte[] buildSitesData( VariantContext vc ) throws IOException { final int contigIndex = contigDictionary.get(vc.getContig()); if ( contigIndex == -1 ) throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getContig())); // note use of encodeRawValue to not insert the typing byte encoder.encodeRawValue(contigIndex, BCF2Type.INT32); // pos. GATK is 1 based, BCF2 is 0 based encoder.encodeRawValue(vc.getStart() - 1, BCF2Type.INT32); // ref length. GATK is closed, but BCF2 is open so the ref length is GATK end - GATK start + 1 // for example, a SNP is in GATK at 1:10-10, which has ref length 10 - 10 + 1 = 1 encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32); // qual if ( vc.hasLog10PError() ) encoder.encodeRawFloat((float) vc.getPhredScaledQual()); else encoder.encodeRawMissingValue(BCF2Type.FLOAT); // info fields final int nAlleles = vc.getNAlleles(); final int nInfo = vc.getAttributes().size(); final int nGenotypeFormatFields = getNGenotypeFormatFields(vc); final int nSamples = header.getNGenotypeSamples(); encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32); encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32); buildID(vc); buildAlleles(vc); buildFilter(vc); buildInfo(vc); return encoder.getRecordBytes(); }
public final <T extends Object> void encodeRawValue(final T value, final BCF2Type type) throws IOException { try { if ( value == type.getMissingJavaValue() ) encodeRawMissingValue(type); else { switch (type) { case INT8: case INT16: case INT32: encodeRawBytes((Integer) value, type); break; case FLOAT: encodeRawFloat((Double) value); break; case CHAR: encodeRawChar((Byte) value); break; default: throw new IllegalArgumentException("Illegal type encountered " + type); } } } catch ( ClassCastException e ) { throw new ClassCastException("BUG: invalid type cast to " + type + " from " + value); } }
@Override public void encode(final BCF2Encoder encoder, final BCF2TypedValue tv) throws IOException { switch ( tv.type ) { case INT8: case INT16: case INT32: encoder.encodeTypedInt((Integer)tv.value, tv.type); break; case FLOAT: encoder.encodeTypedFloat((Double)tv.value); break; case CHAR: encoder.encodeTypedString((String)tv.value); break; } } });
private final byte[] encodeRecord(final List<BCF2TypedValue> toEncode) throws IOException { BCF2Encoder encoder = new BCF2Encoder(); for ( final BCF2TypedValue tv : toEncode ) { if ( tv.isMissing() ) encoder.encodeTypedMissing(tv.type); else { final BCF2Type encodedType = encoder.encode(tv.value); if ( tv.type != null ) // only if we have an expectation Assert.assertEquals(encodedType, tv.type); } } // check output final byte[] record = encoder.getRecordBytes(); Assert.assertNotNull(record); Assert.assertTrue(record.length > 0); return record; }
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes") public void testBCF2EncodingVectors(final List<BCF2TypedValue> toEncode) throws IOException { for ( final BCF2TypedValue tv : toEncode ) { for ( final int length : Arrays.asList(2, 5, 10, 15, 20, 25) ) { BCF2Encoder encoder = new BCF2Encoder(); List<Object> expected = Collections.nCopies(length, tv.value); encoder.encodeTyped(expected, tv.type); BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes()); final Object decoded = decoder.decodeTypedValue(); Assert.assertTrue(decoded instanceof List); final List<Object> decodedList = (List<Object>)decoded; Assert.assertEquals(decodedList.size(), expected.size()); for ( Object decodedValue : decodedList ) myAssertEquals(tv, decodedValue); } } }
public final void encodeTypedString(final String s) throws IOException { encodeTypedString(s.getBytes()); }
/** * Write the field key (dictionary offset and type) into the BCF2Encoder stream * * @param encoder where we write our dictionary offset * @throws IOException */ public final void writeFieldKey(final BCF2Encoder encoder) throws IOException { encoder.encodeTypedInt(dictionaryOffset, dictionaryOffsetType); }
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes") public void testBCF2EncodingVectorsWithMissing(final List<BCF2TypedValue> toEncode) throws IOException { for ( final BCF2TypedValue tv : toEncode ) { if ( tv.type != BCF2Type.CHAR ) { for ( final int length : Arrays.asList(2, 5, 10, 15, 20, 25) ) { final byte td = BCF2Utils.encodeTypeDescriptor(1, tv.type); final BCF2Encoder encoder = new BCF2Encoder(); for ( int i = 0; i < length; i++ ) { encoder.encodeRawValue(i % 2 == 0 ? null : tv.value, tv.type); } final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes()); for ( int i = 0; i < length; i++ ) { final Object decoded = decoder.decodeTypedValue(td); myAssertEquals(i % 2 == 0 ? new BCF2TypedValue(null, tv.type) : tv, decoded); } } } } }
public final void encodeRawFloat(final double value) throws IOException { encodeRawBytes(Float.floatToIntBits((float) value), BCF2Type.FLOAT); }
@Override public void encode(final BCF2Encoder encoder, final BCF2TypedValue tv) throws IOException { encoder.encodeTyped(tv.value, tv.type); } });
private void buildFilter( VariantContext vc ) throws IOException { if ( vc.isFiltered() ) { encodeStringsByRef(vc.getFilters()); } else if ( vc.filtersWereApplied() ) { encodeStringsByRef(Collections.singleton(VCFConstants.PASSES_FILTERS_v4)); } else { encoder.encodeTypedMissing(BCF2Type.INT8); } }
private byte[] buildSamplesData(final VariantContext vc) throws IOException { final BCF2Codec.LazyData lazyData = getLazyData(vc); // has critical side effects if ( lazyData != null ) { // we never decoded any data from this BCF file, so just pass it back return lazyData.bytes; } // we have to do work to convert the VC into a BCF2 byte stream final List<String> genotypeFields = vc.calcVCFGenotypeKeys(header); for ( final String field : genotypeFields ) { final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field); if ( writer == null ) errorUnexpectedFieldToWrite(vc, field, "FORMAT"); assert writer != null; writer.start(encoder, vc); for ( final String name : sampleNames ) { Genotype g = vc.getGenotype(name); if ( g == null ) g = GenotypeBuilder.createMissing(name, writer.nValuesPerGenotype); writer.addGenotype(encoder, vc, g); } writer.done(encoder, vc); } return encoder.getRecordBytes(); }
public void testBCF2BasicTypesWithEncodeMe(final List<BCF2TypedValue> toEncode, final EncodeMe func) throws IOException { for ( final BCF2TypedValue tv : toEncode ) { BCF2Encoder encoder = new BCF2Encoder(); func.encode(encoder, tv); BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes()); final Object decoded = decoder.decodeTypedValue(); Assert.assertNotNull(decoded); Assert.assertFalse(decoded instanceof List); myAssertEquals(tv, decoded); } }
private byte[] buildSitesData( VariantContext vc ) throws IOException { final int contigIndex = contigDictionary.get(vc.getContig()); if ( contigIndex == -1 ) throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getContig())); // note use of encodeRawValue to not insert the typing byte encoder.encodeRawValue(contigIndex, BCF2Type.INT32); // pos. GATK is 1 based, BCF2 is 0 based encoder.encodeRawValue(vc.getStart() - 1, BCF2Type.INT32); // ref length. GATK is closed, but BCF2 is open so the ref length is GATK end - GATK start + 1 // for example, a SNP is in GATK at 1:10-10, which has ref length 10 - 10 + 1 = 1 encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCF2Type.INT32); // qual if ( vc.hasLog10PError() ) encoder.encodeRawFloat((float) vc.getPhredScaledQual()); else encoder.encodeRawMissingValue(BCF2Type.FLOAT); // info fields final int nAlleles = vc.getNAlleles(); final int nInfo = vc.getAttributes().size(); final int nGenotypeFormatFields = getNGenotypeFormatFields(vc); final int nSamples = header.getNGenotypeSamples(); encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x0000FFFF), BCF2Type.INT32); encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x00FFFFF), BCF2Type.INT32); buildID(vc); buildAlleles(vc); buildFilter(vc); buildInfo(vc); return encoder.getRecordBytes(); }
public final <T extends Object> void encodeRawValue(final T value, final BCF2Type type) throws IOException { try { if ( value == type.getMissingJavaValue() ) encodeRawMissingValue(type); else { switch (type) { case INT8: case INT16: case INT32: encodeRawBytes((Integer) value, type); break; case FLOAT: encodeRawFloat((Double) value); break; case CHAR: encodeRawChar((Byte) value); break; default: throw new IllegalArgumentException("Illegal type encountered " + type); } } } catch ( ClassCastException e ) { throw new ClassCastException("BUG: invalid type cast to " + type + " from " + value); } }