VCFReaderIterator(final InputStream inputStream) { this.inputStream = inputStream; this.lineIterator = this.codec.makeSourceFromStream(this.inputStream); this.vcfHeader = (VCFHeader) this.codec.readActualHeader(this.lineIterator); }
VCFReaderIterator(final InputStream inputStream) { this.inputStream = inputStream; this.lineIterator = this.codec.makeSourceFromStream(this.inputStream); this.vcfHeader = (VCFHeader) this.codec.readActualHeader(this.lineIterator); }
public static VCFHeader parseVcfHeader(String variantFileHeader) throws IOException { VCFCodec vcfCodec = new VCFCodec(); LineIterator source = vcfCodec.makeSourceFromStream(new ByteArrayInputStream(variantFileHeader.getBytes())); FeatureCodecHeader featureCodecHeader = vcfCodec.readHeader(source); return (VCFHeader) featureCodecHeader.getHeaderValue(); }
/** * Read all of the VCF records from source into memory, returning the header and the VariantContexts * * SHOULD ONLY BE USED FOR UNIT/INTEGRATION TESTING PURPOSES! * * @param source the file to read, must be in VCF4 format * @return * @throws java.io.IOException */ public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException { // read in the features final List<VariantContext> vcs = new ArrayList<VariantContext>(); final VCFCodec codec = new VCFCodec(); PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source)); final LineIterator vcfSource = codec.makeSourceFromStream(pbs); try { final VCFHeader vcfHeader = (VCFHeader) codec.readActualHeader(vcfSource); while (vcfSource.hasNext()) { final VariantContext vc = codec.decode(vcfSource); if ( vc != null ) vcs.add(vc); } return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs); } finally { codec.close(vcfSource); } }
@Test public void testAskingGCContent() throws IOException{ WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000 -A GCContent", 1, Arrays.asList("fcc42d7e0351efa7c3db0a114cbc17ed")); final File outputVCF = executeTest("test file has annotations, adding GCContent annotation", spec).getFirst().get(0); final VCFCodec codec = new VCFCodec(); final VCFHeader header = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new FileInputStream(outputVCF))); final VCFHeaderLine infoLineGC = header.getInfoHeaderLine(GATKVCFConstants.GC_CONTENT_KEY); // GC content must be a Float type Assert.assertTrue(infoLineGC.toString().contains("Type=Float")); }
@Test(enabled = true) public void testGATKVersionInVCF() throws Exception { WalkerTestSpec spec = new WalkerTestSpec("-T TestPrintVariantsWalker -R " + b37KGReference + " -V " + privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf" + " -o %s -L 20:61098", 1, Arrays.asList("")); spec.disableShadowBCF(); final File vcf = executeTest("testGATKVersionInVCF", spec).first.get(0); final VCFCodec codec = new VCFCodec(); final VCFHeader header = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new FileInputStream(vcf))); // go through the metadata headers and look for ones that start with the GATK_COMMAND_LINE_KEY VCFHeaderLine versionLine = null; for ( final VCFHeaderLine headerLine : header.getMetaDataInInputOrder()) { if(headerLine.getKey().startsWith(GATKVCFUtils.GATK_COMMAND_LINE_KEY)) { versionLine = headerLine; break; } } Assert.assertNotNull(versionLine); Assert.assertTrue(versionLine.toString().contains("TestPrintVariantsWalker")); }
@Test(enabled = true) public void testMultipleGATKVersionsInVCF() throws Exception { WalkerTestSpec spec = new WalkerTestSpec("-T TestPrintVariantsWalker -R " + b37KGReference + " -V " + privateTestDir + "gatkCommandLineInHeader.vcf" + " -o %s", 1, Arrays.asList("")); spec.disableShadowBCF(); final File vcf = executeTest("testMultipleGATKVersionsInVCF", spec).first.get(0); final VCFCodec codec = new VCFCodec(); final VCFHeader header = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new FileInputStream(vcf))); boolean foundHC = false; boolean foundPV = false; for ( final VCFHeaderLine line : header.getMetaDataInInputOrder() ) { if ( line.getKey().startsWith(GATKVCFUtils.GATK_COMMAND_LINE_KEY) ) { if ( line.toString().contains("HaplotypeCaller") ) { Assert.assertFalse(foundHC); foundHC = true; } if ( line.toString().contains("TestPrintVariantsWalker") ) { Assert.assertFalse(foundPV); foundPV = true; } } } Assert.assertTrue(foundHC, "Didn't find HaplotypeCaller command line header field"); Assert.assertTrue(foundPV, "Didn't find TestPrintVariantsWalker command line header field"); }
/** * Returns a list of VariantContext records from a VCF file * * @param vcfFile VCF file * * @throws IOException if the file does not exist or can not be opened * * @return list of VariantContext records */ private static List<VariantContext> getVariantContexts(final File vcfFile) throws IOException { final VCFCodec codec = new VCFCodec(); final FileInputStream s = new FileInputStream(vcfFile); final LineIterator lineIteratorVCF = codec.makeSourceFromStream(new PositionalBufferedStream(s)); codec.readHeader(lineIteratorVCF); final List<VariantContext> VCs = new ArrayList<>(); while ( lineIteratorVCF.hasNext() ) { final String line = lineIteratorVCF.next(); Assert.assertFalse(line == null); VCs.add(codec.decode(line)); } return VCs; }
final File vcf = executeTest("testMultipleGATKVersionsSameWalkerInVCF", spec).first.get(0); final VCFCodec codec = new VCFCodec(); final VCFHeader header = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new FileInputStream(vcf)));
@Test public void HCTestDanglingTailMergingForDeletions() throws IOException { final String base = String.format("-T HaplotypeCaller --disableDithering --pcr_indel_model NONE -R %s -I %s", REF, NA12878_BAM) + " --no_cmdline_in_header -o %s -L 20:10130740-10130800 --allowNonUniqueKmersInRef"; final WalkerTestSpec spec = new WalkerTestSpec(base, 1, Arrays.asList("")); final File outputVCF = executeTest("HCTestDanglingTailMergingForDeletions", spec).getFirst().get(0); // confirm that the call is the correct one final VCFCodec codec = new VCFCodec(); final FileInputStream s = new FileInputStream(outputVCF); final LineIterator lineIterator = codec.makeSourceFromStream(new PositionalBufferedStream(s)); codec.readHeader(lineIterator); final String line = lineIterator.next(); Assert.assertFalse(line == null); final VariantContext vc = codec.decode(line); Assert.assertTrue(vc.isBiallelic()); Assert.assertTrue(vc.getReference().basesMatch("ATGTATG")); Assert.assertTrue(vc.getAlternateAllele(0).basesMatch("A")); }
private VCFHeader createHeader(String headerStr) { VCFCodec codec = new VCFCodec(); VCFHeader head = null; head = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(headerStr)))); return head; }
@Test(enabled=true) public void testRobustness() { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_1)))); VCFHeader disjointCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_2)))); VCFHeader overlapCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_3)))); ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader,null); ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader,null); // test what happens if you put in disjoint sets and start making requests Assert.assertEquals(0,disjointMetrics.getPerSampleGenotypeConcordance().size()); String msg = "No Exception Thrown"; try { disjointMetrics.getGenotypeConcordance("test3_sample4"); } catch ( Exception e) { msg = e.getMessage(); } Assert.assertEquals("Attempted to request the concordance table for sample test3_sample4 on which it was not calculated",msg); // test that the overlapping sample is in the overlapping table (basically do this without throwing an exception) overlapMetrics.getGenotypeConcordance("test3_sample3"); String msg2 = "No Exception Thrown"; try { disjointMetrics.getGenotypeConcordance("test3_sample4"); } catch ( Exception e) { msg2 = e.getMessage(); } Assert.assertEquals("Attempted to request the concordance table for sample test3_sample4 on which it was not calculated",msg2); // test what happens if you try to calculate NRS and NRD on an empty table Assert.assertEquals(disjointMetrics.getOverallNRD(), 1.0, 1e-16); Assert.assertEquals(disjointMetrics.getOverallNRS(), 0.0, 1e-16); }
/** * Returns a list of attribute values from a VCF file * * @param vcfFile VCF file * @param attributeName attribute name * * @throws IOException if the file does not exist or can not be opened * * @return list of attribute values */ private List<String> getAttributeValues(final File vcfFile, final String attributeName) throws IOException { final VCFCodec codec = new VCFCodec(); final FileInputStream s = new FileInputStream(vcfFile); final LineIterator lineIteratorVCF = codec.makeSourceFromStream(new PositionalBufferedStream(s)); codec.readHeader(lineIteratorVCF); final List<String> attributeValues = new ArrayList<String>(); while (lineIteratorVCF.hasNext()) { final String line = lineIteratorVCF.next(); Assert.assertFalse(line == null); final VariantContext vc = codec.decode(line); for (final Genotype g : vc.getGenotypes()) { if (g.hasExtendedAttribute(attributeName)) { attributeValues.add((String) g.getExtendedAttribute(attributeName)); } } } return attributeValues; }
final VCFCodec codec = new VCFCodec(); final FileInputStream s = new FileInputStream(vcfFile); final LineIterator lineIteratorVCF = codec.makeSourceFromStream(new PositionalBufferedStream(s)); codec.readHeader(lineIteratorVCF);
@Test public void testMonoallelicSite() { final Pair<VariantContext,VariantContext> data = getMonoallelicData(); final VariantContext eval = data.getFirst(); final VariantContext truth = data.getSecond(); final VCFCodec codec = new VCFCodec(); final VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); final VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); final ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); metrics.update(eval,truth); Assert.assertEquals(metrics.getGenotypeConcordance("test2_sample1").getnMismatchingAlt(),0); Assert.assertEquals(metrics.getGenotypeConcordance("test2_sample2").getnMismatchingAlt(),0); Assert.assertEquals(metrics.getGenotypeConcordance("test2_sample1").getTable()[3][1],1); Assert.assertEquals(metrics.getGenotypeConcordance("test2_sample2").getTable()[0][1],1); } }
@Test(enabled=true) public void testNoCalls() { Pair<VariantContext,VariantContext> data = getData4(); VariantContext eval = data.getFirst(); VariantContext truth = data.getSecond(); VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); metrics.update(eval,truth); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[0][2],1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][0],1); }
@Test(enabled = true) public void testSites() { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); List<Pair<VariantContext,VariantContext>> data = getData7(); int idx = 0; int[] expecNotMatch = new int[]{0,0,0,0,0,1,1}; for ( Pair<VariantContext,VariantContext> varPair : data ) { metrics.update(varPair.getFirst(),varPair.getSecond()); Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH),expecNotMatch[idx]); logger.info(idx); idx++; } Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH),1); Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.ALLELES_MATCH),2); Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.EVAL_ONLY),1); Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.TRUTH_ONLY),1); Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.EVAL_SUBSET_TRUTH),1); Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.EVAL_SUPERSET_TRUTH),1); }
@Test(enabled=true) public void testSimpleComparison() { Pair<VariantContext,VariantContext> data = getData1(); VariantContext eval = data.getFirst(); VariantContext truth = data.getSecond(); VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); metrics.update(eval,truth); Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2); Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],1); Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[1][1],1); }
@Test(enabled=true) public void testMissing() { Pair<VariantContext,VariantContext> data = getData5(); VariantContext eval = data.getFirst(); VariantContext truth = data.getSecond(); VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); metrics.update(eval,truth); Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE)); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[0][2],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[4][2],1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][0],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][4],1); }
@Test(enabled=true) public void testComplex() { Pair<VariantContext,VariantContext> data = getData3(); VariantContext eval = data.getFirst(); VariantContext truth = data.getSecond(); VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); metrics.update(eval,truth); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample1").getnMismatchingAlt(),1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[3][3],1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[1][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][2],1); Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[3][3],1); Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.EVAL_SUPERSET_TRUTH.ordinal()],0); Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH.ordinal()],1); Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_MATCH.ordinal()],0); }