private static double calculateNRS(int[][] concordanceCounts) { long confirmedVariant = 0; long unconfirmedVariant = 0; for ( GenotypeType truthState : Arrays.asList(GenotypeType.HET,GenotypeType.HOM_VAR) ) { for ( GenotypeType evalState : GenotypeType.values() ) { if ( evalState == GenotypeType.MIXED ) continue; if ( evalState.equals(GenotypeType.HET) || evalState.equals(GenotypeType.HOM_VAR) ) confirmedVariant += concordanceCounts[evalState.ordinal()][truthState.ordinal()]; else unconfirmedVariant += concordanceCounts[evalState.ordinal()][truthState.ordinal()]; } } long total = confirmedVariant + unconfirmedVariant; // note: if there are no observations (so the ratio is NaN) set this to 0% return total == 0l ? 0.0 : ( (double) confirmedVariant ) / ( (double) ( total ) ); }
GATKReportTable concordanceCounts = new GATKReportTable("GenotypeConcordance_Counts","Per-sample concordance tables: comparison counts",2+GenotypeType.values().length*GenotypeType.values().length); GATKReportTable concordanceEvalProportions = new GATKReportTable("GenotypeConcordance_EvalProportions", "Per-sample concordance tables: proportions of genotypes called in eval",2+GenotypeType.values().length*GenotypeType.values().length); GATKReportTable concordanceCompProportions = new GATKReportTable("GenotypeConcordance_CompProportions", "Per-sample concordance tables: proportions of genotypes called in comp",2+GenotypeType.values().length*GenotypeType.values().length); GATKReportTable concordanceSummary = new GATKReportTable("GenotypeConcordance_Summary","Per-sample summary statistics: NRS, NRD, and OGC",2); GATKReportTable siteConcordance = new GATKReportTable("SiteConcordance_Summary","Site-level summary statistics",ConcordanceMetrics.SiteConcordanceType.values().length); for ( GenotypeType evalType : GenotypeType.values() ) { for ( GenotypeType compType : GenotypeType.values() ) { String rowKey = String.format("%s_%s_%s",entry.getKey(),evalType.toString(),compType.toString()); concordanceCounts.set(rowKey,"Sample",entry.getKey()); concordanceCounts.set(rowKey,"Eval_Genotype",evalType.toString()); concordanceCounts.set(rowKey,"Comp_Genotype",compType.toString()); int count = table.get(evalType, compType); concordanceCounts.set(rowKey,"Count",count); if ( evalType == GenotypeType.HET || evalType == GenotypeType.HOM_REF || evalType == GenotypeType.HOM_VAR) { concordanceEvalProportions.set(rowKey,"Sample",entry.getKey()); concordanceEvalProportions.set(rowKey,"Eval_Genotype",evalType.toString()); concordanceEvalProportions.set(rowKey,"Comp_Genotype",compType.toString()); concordanceEvalProportions.set(rowKey,"Proportion",repairNaN(( (double) count)/table.getnEvalGenotypes(evalType))); concordanceCompProportions.set(rowKey,"Eval_Genotype",evalType.toString()); concordanceCompProportions.set(rowKey,"Comp_Genotype",compType.toString()); concordanceCompProportions.set(rowKey,"Proportion",repairNaN(( (double) count)/table.getnCompGenotypes(compType))); for ( GenotypeType evalType : GenotypeType.values() ) { for ( GenotypeType compType : GenotypeType.values() ) { String rowKey = String.format("%s_%s_%s",sampleKey,evalType.toString(),compType.toString()); concordanceCounts.set(rowKey,"Sample",sampleKey); concordanceCounts.set(rowKey,"Eval_Genotype",evalType.toString());
public String getTypeString() { return vcfGenotype.getType().toString(); }
@Test(enabled=true) public void testMissing() { Pair<VariantContext,VariantContext> data = getData5(); VariantContext eval = data.getFirst(); VariantContext truth = data.getSecond(); VCFCodec codec = new VCFCodec(); VCFHeader evalHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); VCFHeader compHeader = (VCFHeader)codec.readActualHeader(codec.makeSourceFromStream(new PositionalBufferedStream(new StringBufferInputStream(TEST_1_HEADER)))); ConcordanceMetrics metrics = new ConcordanceMetrics(evalHeader,compHeader,null); metrics.update(eval,truth); Assert.assertTrue(eval.getGenotype("test1_sample2").getType().equals(GenotypeType.UNAVAILABLE)); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getnMismatchingAlt(),0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[0][2],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample2").getTable()[4][2],1); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][1],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][3],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][0],0); Assert.assertEquals(metrics.getGenotypeConcordance("test1_sample3").getTable()[2][4],1); }
private static double calculateNRD(int[][] concordanceCounts) { int correct = 0; int total = 0; correct += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HET.ordinal()]; correct += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_VAR.ordinal()]; total += correct; total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HET.ordinal()]; total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_VAR.ordinal()]; total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_REF.ordinal()]; total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_VAR.ordinal()]; total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_REF.ordinal()]; total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HET.ordinal()]; // NRD is by definition incorrec/total = 1.0-correct/total // note: if there are no observations (so the ratio is NaN), set this to 100% return total == 0 ? 1.0 : 1.0 - ( (double) correct)/( (double) total); }
private static double calculateOGC(int[][] concordanceCounts) { int correct = 0; int total = 0; correct += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_REF.ordinal()]; correct += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HET.ordinal()]; correct += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_VAR.ordinal()]; total += correct; total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HET.ordinal()]; total += concordanceCounts[GenotypeType.HOM_REF.ordinal()][GenotypeType.HOM_VAR.ordinal()]; total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_REF.ordinal()]; total += concordanceCounts[GenotypeType.HET.ordinal()][GenotypeType.HOM_VAR.ordinal()]; total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HOM_REF.ordinal()]; total += concordanceCounts[GenotypeType.HOM_VAR.ordinal()][GenotypeType.HET.ordinal()]; // OGC is by definition correct/total // note: if there are no observations (so the ratio is NaN), set this to 100% return total == 0 ? 1.0 : ( (double) correct)/( (double) total); }
private void createInheritanceMap(){ inheritance = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>>(GenotypeType.class); for(GenotypeType mType : GenotypeType.values()){ inheritance.put(mType, new EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>(GenotypeType.class)); for(GenotypeType dType : GenotypeType.values()){ inheritance.get(mType).put(dType, new EnumMap<GenotypeType,Integer>(GenotypeType.class)); for(GenotypeType cType : GenotypeType.values()){ inheritance.get(mType).get(dType).put(cType, 0); } } } }
/** * Genotype-specific functions -- how many hom ref calls are there in the genotypes? * * @return number of hom ref calls */ public int getHomRefCount() { calculateGenotypeCounts(); return genotypeCounts[GenotypeType.HOM_REF.ordinal()]; }
private void buildMatrices(){ for(final GenotypeType mother : GenotypeType.values()){ mvCountMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>(GenotypeType.class)); for(final GenotypeType father : GenotypeType.values()){ mvCountMatrix.get(mother).put(father,new EnumMap<GenotypeType, Integer>(GenotypeType.class)); for(final GenotypeType child : GenotypeType.values()){ mvCountMatrix.get(mother).get(father).put(child, getCombinationMVCount(mother, father, child)); } } } }
/** * Genotype-specific functions -- how many het calls are there in the genotypes? * * @return number of het calls */ public int getHetCount() { calculateGenotypeCounts(); return genotypeCounts[GenotypeType.HET.ordinal()]; }
private void buildMatrices(){ mvCountMatrix = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>>(GenotypeType.class); transmissionMatrix = new EnumMap<GenotypeType,EnumMap<GenotypeType,EnumMap<GenotypeType,TrioPhase>>>(GenotypeType.class); for(GenotypeType mother : GenotypeType.values()){ mvCountMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,Integer>>(GenotypeType.class)); transmissionMatrix.put(mother,new EnumMap<GenotypeType,EnumMap<GenotypeType,TrioPhase>>(GenotypeType.class)); for(GenotypeType father : GenotypeType.values()){ mvCountMatrix.get(mother).put(father,new EnumMap<GenotypeType, Integer>(GenotypeType.class)); transmissionMatrix.get(mother).put(father,new EnumMap<GenotypeType,TrioPhase>(GenotypeType.class)); for(GenotypeType child : GenotypeType.values()){ mvCountMatrix.get(mother).get(father).put(child, getCombinationMVCount(mother, father, child)); transmissionMatrix.get(mother).get(father).put(child,new TrioPhase(mother,father,child)); } } } }
/** * Genotype-specific functions -- how many het calls are there in the genotypes? * * @return number of het calls */ public int getHetCount() { calculateGenotypeCounts(); return genotypeCounts[GenotypeType.HET.ordinal()]; }