VCFReaderIterator(final InputStream inputStream) { this.inputStream = inputStream; this.lineIterator = this.codec.makeSourceFromStream(this.inputStream); this.vcfHeader = (VCFHeader) this.codec.readActualHeader(this.lineIterator); }
VCFReaderIterator(final InputStream inputStream) { this.inputStream = inputStream; this.lineIterator = this.codec.makeSourceFromStream(this.inputStream); this.vcfHeader = (VCFHeader) this.codec.readActualHeader(this.lineIterator); }
public static VariantContextContainer readAllVCs(final File input, final VCFCodec codec) throws FileNotFoundException { final LineIterator lineIterator = new LineIteratorImpl(new SynchronousLineReader(new BufferedInputStream(new FileInputStream(input)))); final VCFHeader vcfHeader = (VCFHeader) codec.readActualHeader(lineIterator); return new VariantContextTestProvider.VariantContextContainer(vcfHeader, new VariantContextTestProvider.VCIterable<LineIterator>(codec, vcfHeader) { @Override public boolean hasNext() { return lineIterator.hasNext(); } @Override public LineIterator nextSource() { return lineIterator; } }); }
/** * Read all of the VCF records from source into memory, returning the header and the VariantContexts * * SHOULD ONLY BE USED FOR UNIT/INTEGRATION TESTING PURPOSES! * * @param source the file to read, must be in VCF4 format * @return * @throws java.io.IOException */ public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException { // read in the features final List<VariantContext> vcs = new ArrayList<VariantContext>(); final VCFCodec codec = new VCFCodec(); PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source)); final LineIterator vcfSource = codec.makeSourceFromStream(pbs); try { final VCFHeader vcfHeader = (VCFHeader) codec.readActualHeader(vcfSource); while (vcfSource.hasNext()) { final VariantContext vc = codec.decode(vcfSource); if ( vc != null ) vcs.add(vc); } return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs); } finally { codec.close(vcfSource); } }
private VCFHeader createHeader(String headerStr) { VCFCodec codec = new VCFCodec(); VCFHeader header = (VCFHeader) codec.readActualHeader(new LineIteratorImpl(new SynchronousLineReader( new StringReader(headerStr)))); Assert.assertEquals(header.getMetaDataInInputOrder().size(), VCF4headerStringCount); return header; }
@Test public void testAskingGCContent() throws IOException{ WalkerTestSpec spec = new WalkerTestSpec( baseTestString() + " --variant " + privateTestDir + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000 -A GCContent", 1, Arrays.asList("fcc42d7e0351efa7c3db0a114cbc17ed")); final File outputVCF = executeTest("test file has annotations, adding GCContent annotation", spec).getFirst().get(0); final VCFCodec codec = new VCFCodec(); final VCFHeader header = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new FileInputStream(outputVCF))); final VCFHeaderLine infoLineGC = header.getInfoHeaderLine(GATKVCFConstants.GC_CONTENT_KEY); // GC content must be a Float type Assert.assertTrue(infoLineGC.toString().contains("Type=Float")); }
@Test(enabled = true) public void testGATKVersionInVCF() throws Exception { WalkerTestSpec spec = new WalkerTestSpec("-T TestPrintVariantsWalker -R " + b37KGReference + " -V " + privateTestDir + "NA12878.WGS.b37.chr20.firstMB.vcf" + " -o %s -L 20:61098", 1, Arrays.asList("")); spec.disableShadowBCF(); final File vcf = executeTest("testGATKVersionInVCF", spec).first.get(0); final VCFCodec codec = new VCFCodec(); final VCFHeader header = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new FileInputStream(vcf))); // go through the metadata headers and look for ones that start with the GATK_COMMAND_LINE_KEY VCFHeaderLine versionLine = null; for ( final VCFHeaderLine headerLine : header.getMetaDataInInputOrder()) { if(headerLine.getKey().startsWith(GATKVCFUtils.GATK_COMMAND_LINE_KEY)) { versionLine = headerLine; break; } } Assert.assertNotNull(versionLine); Assert.assertTrue(versionLine.toString().contains("TestPrintVariantsWalker")); }
@Test(enabled = true) public void testMultipleGATKVersionsInVCF() throws Exception { WalkerTestSpec spec = new WalkerTestSpec("-T TestPrintVariantsWalker -R " + b37KGReference + " -V " + privateTestDir + "gatkCommandLineInHeader.vcf" + " -o %s", 1, Arrays.asList("")); spec.disableShadowBCF(); final File vcf = executeTest("testMultipleGATKVersionsInVCF", spec).first.get(0); final VCFCodec codec = new VCFCodec(); final VCFHeader header = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new FileInputStream(vcf))); boolean foundHC = false; boolean foundPV = false; for ( final VCFHeaderLine line : header.getMetaDataInInputOrder() ) { if ( line.getKey().startsWith(GATKVCFUtils.GATK_COMMAND_LINE_KEY) ) { if ( line.toString().contains("HaplotypeCaller") ) { Assert.assertFalse(foundHC); foundHC = true; } if ( line.toString().contains("TestPrintVariantsWalker") ) { Assert.assertFalse(foundPV); foundPV = true; } } } Assert.assertTrue(foundHC, "Didn't find HaplotypeCaller command line header field"); Assert.assertTrue(foundPV, "Didn't find TestPrintVariantsWalker command line header field"); }
final File vcf = executeTest("testMultipleGATKVersionsSameWalkerInVCF", spec).first.get(0); final VCFCodec codec = new VCFCodec(); final VCFHeader header = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new FileInputStream(vcf)));
private VCFHeader createHeader(String headerStr) { VCFCodec codec = new VCFCodec(); VCFHeader head = null; head = (VCFHeader) codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(headerStr)))); return head; }
@Test(enabled=true) public void testRobustness() { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_1)))); VCFHeader disjointCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_2)))); VCFHeader overlapCompHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_3_HEADER_3)))); ConcordanceMetrics disjointMetrics = new ConcordanceMetrics(evalHeader,disjointCompHeader,null); ConcordanceMetrics overlapMetrics = new ConcordanceMetrics(evalHeader,overlapCompHeader,null); // test what happens if you put in disjoint sets and start making requests Assert.assertEquals(0,disjointMetrics.getPerSampleGenotypeConcordance().size()); String msg = "No Exception Thrown"; try { disjointMetrics.getGenotypeConcordance("test3_sample4"); } catch ( Exception e) { msg = e.getMessage(); } Assert.assertEquals("Attempted to request the concordance table for sample test3_sample4 on which it was not calculated",msg); // test that the overlapping sample is in the overlapping table (basically do this without throwing an exception) overlapMetrics.getGenotypeConcordance("test3_sample3"); String msg2 = "No Exception Thrown"; try { disjointMetrics.getGenotypeConcordance("test3_sample4"); } catch ( Exception e) { msg2 = e.getMessage(); } Assert.assertEquals("Attempted to request the concordance table for sample test3_sample4 on which it was not calculated",msg2); // test what happens if you try to calculate NRS and NRD on an empty table Assert.assertEquals(disjointMetrics.getOverallNRD(), 1.0, 1e-16); Assert.assertEquals(disjointMetrics.getOverallNRS(), 0.0, 1e-16); }
@Test public void testMonoallelicSite() { final Pair<VariantContext,VariantContext> data = getMonoallelicData(); final VariantContext eval = data.getFirst(); final VariantContext truth = data.getSecond(); final VCFCodec codec = new VCFCodec(); final VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); final VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); final ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); metrics.update(eval,truth); Assert.assertEquals(metrics.getGenotypeConcordance("test2_sample1").getnMismatchingAlt(),0); Assert.assertEquals(metrics.getGenotypeConcordance("test2_sample2").getnMismatchingAlt(),0); Assert.assertEquals(metrics.getGenotypeConcordance("test2_sample1").getTable()[3][1],1); Assert.assertEquals(metrics.getGenotypeConcordance("test2_sample2").getTable()[0][1],1); } }
@Test(enabled = true) public void testSites() { VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); List<Pair<VariantContext,VariantContext>> data = getData7(); int idx = 0; int[] expecNotMatch = new int[]{0,0,0,0,0,1,1}; for ( Pair<VariantContext,VariantContext> varPair : data ) { metrics.update(varPair.getFirst(),varPair.getSecond()); Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH),expecNotMatch[idx]); logger.info(idx); idx++; } Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH),1); Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.ALLELES_MATCH),2); Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.EVAL_ONLY),1); Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.TRUTH_ONLY),1); Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.EVAL_SUBSET_TRUTH),1); Assert.assertEquals(metrics.getOverallSiteConcordance().get(ConcordanceMetrics.SiteConcordanceType.EVAL_SUPERSET_TRUTH),1); }
@Test(enabled=true) public void testNoCalls() { Pair<VariantContext,VariantContext> data = getData4(); VariantContext eval = data.getFirst(); VariantContext truth = data.getSecond(); VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); metrics.update(eval,truth); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[0][2],1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][0],1); }
@Test(enabled=true) public void testMissing() { Pair<VariantContext,VariantContext> data = getData5(); VariantContext eval = data.getFirst(); VariantContext truth = data.getSecond(); VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); metrics.update(eval,truth); Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE)); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[0][2],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[4][2],1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][0],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][4],1); }
@Test(enabled=true) public void testSimpleComparison() { Pair<VariantContext,VariantContext> data = getData1(); VariantContext eval = data.getFirst(); VariantContext truth = data.getSecond(); VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); metrics.update(eval,truth); Assert.assertEquals(eval.getGenotype("test1_sample2").getType().ordinal(), 2); Assert.assertEquals(truth.getGenotype("test1_sample2").getType().ordinal(),1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],1); Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[1][1],1); }
@Test(enabled=true) public void testComplex() { Pair<VariantContext,VariantContext> data = getData3(); VariantContext eval = data.getFirst(); VariantContext truth = data.getSecond(); VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); metrics.update(eval,truth); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample1").getnMismatchingAlt(),1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[3][3],1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[1][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][2],1); Assert.assertEquals(metrics.getOverallGenotypeConcordance().getTable()[3][3],1); Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.EVAL_SUPERSET_TRUTH.ordinal()],0); Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_DO_NOT_MATCH.ordinal()],1); Assert.assertEquals(metrics.getOverallSiteConcordance().getSiteConcordance()[ConcordanceMetrics.SiteConcordanceType.ALLELES_MATCH.ordinal()],0); }
VariantContext truth = data.getSecond(); VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); int[][] table = metrics.getOverallGenotypeConcordance().getTable();
VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_2_HEADER)))); ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null);
VariantContext truth = data.getSecond(); VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); metrics.update(eval,truth); truth = data.getFirst(); codec = new VCFCodec(); evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); metrics = new ConcordanceMetrics(evalHeader,compHeader,null); metrics.update(eval,truth);