public VariantContext createEmptyContext(VariantContext other, List<String> samples) { VariantContextBuilder builder = new VariantContextBuilder(); // set the alleles to be the same builder.alleles(other.getAlleles()); builder.loc(other.getChr(),other.getStart(),other.getEnd()); // set all genotypes to empty List<Genotype> genotypes = new ArrayList<Genotype>(samples.size()); for ( String sample : samples ) genotypes.add(GenotypeBuilder.create(sample, new ArrayList<Allele>(0))); builder.genotypes(genotypes); return builder.make(); }
public enum GenotypeAssignmentMethod { /** * set all of the genotype GT values to NO_CALL */ SET_TO_NO_CALL, /** * set all of the genotype GT values to NO_CALL and remove annotations */ SET_TO_NO_CALL_NO_ANNOTATIONS, /** * Use the subsetted PLs to greedily assigned genotypes */ USE_PLS_TO_ASSIGN, /** * Try to match the original GT calls, if at all possible * * Suppose I have 3 alleles: A/B/C and the following samples: * * original_GT best_match to A/B best_match to A/C * S1 => A/A A/A A/A * S2 => A/B A/B A/A * S3 => B/B B/B A/A * S4 => B/C A/B A/C * S5 => C/C A/A C/C * * Basically, all alleles not in the subset map to ref. It means that het-alt genotypes
@DataProvider(name = "variantsProvider") public Object[][] variantsProvider() { final VariantContextBuilder vc_builder = new VariantContextBuilder("testCode", "chr1", 1, 1, Arrays.asList(refA, G)); final GenotypeBuilder gt_builder = new GenotypeBuilder("test"); final List<VariantContext> vcs = new ArrayList<VariantContext>(10); //hets: vcs.add(vc_builder.genotypes(gt_builder.alleles(Arrays.asList(refA, G)).make()).make()); vcs.add(vc_builder.loc("chr1", 10, 10).genotypes(gt_builder.alleles(Arrays.asList(refA, G)).make()).make()); //non-variant: vcs.add(vc_builder.loc("chr1", 20, 20).genotypes(gt_builder.alleles(Collections.singletonList(refA)).make()).make()); vcs.add(vc_builder.loc("chr1", 30, 30).genotypes(gt_builder.alleles(Collections.singletonList(refA)).make()).make()); return new Object[][]{new Object[]{vcs.iterator(), new int[]{1, 10}}}; }
@Test public void testInsertionDeletionBecomingNullAllele() { final byte[] ref = "CAAA".getBytes(); final GenomeLoc refLoc = genomeLocParser.createGenomeLoc("2", 1700, 1700 + ref.length); // insertion + deletion results in a null allele, should return false final VariantContext thisVC = new VariantContextBuilder().loc("2", 1700, 1701).alleles("CA","C").make(); final VariantContext nextVC = new VariantContextBuilder().loc("2", 1703, 1703).alleles("A","AA").make(); final VariantContext mergedVC = merger.createMergedVariantContext(thisVC, nextVC, ref, refLoc); Assert.assertNull(mergedVC, "Insertion deletion becoming a null allele should return a null variant context"); }
/** * @return Variant context indicating error */ private VariantContext buildErrorVariantContext(String origString, String message) { Allele alleleRef = Allele.create("N", true); Allele alleleAlt = Allele.create("<ERROR>", false); return new VariantContextBuilder().loc("1", 1, 1).alleles(Lists.newArrayList(alleleRef, alleleAlt)) .filter("PARSE_ERROR").attribute("ORIG_VAR", urlEncode(origString)) .attribute("ERROR_MESSAGE", urlEncode(message)).make(); }
builder.loc(loc.getContig(), loc.getStart(), stop); builder.alleles(Arrays.asList(refAllele, altAllele)); builder.noGenotypes();
/** * @return a generic one-based position variant context with a heterozygous genotype having no attributes. */ private VariantContext buildVariantContext(int chr, int pos, String ref, String alt, double qual, String sampleName) { Allele refAllele = Allele.create(ref, true); Allele altAllele = Allele.create(alt); List<Allele> alleles = Arrays.asList(refAllele, altAllele); VariantContextBuilder vcBuilder = new VariantContextBuilder(); // build Genotype GenotypeBuilder gtBuilder = new GenotypeBuilder(sampleName).noAttributes(); //default to HETEROZYGOUS gtBuilder.alleles(alleles); // build VariantContext vcBuilder.loc("chr" + chr, pos, pos - 1L + ref.length()); vcBuilder.alleles(alleles); vcBuilder.genotypes(gtBuilder.make()); vcBuilder.log10PError(-0.1 * qual); return vcBuilder.make(); }
builder.loc(context.getChr(),context.getStart(),context.getEnd()); List<Genotype> newGeno = new ArrayList<Genotype>(context.getNSamples()); for ( Genotype g : context.getGenotypes().iterateInSampleNameOrder() ) {
@Test public void testReverseComplementFailureDoesNotErrorOut() { final VariantContextBuilder builder = new VariantContextBuilder().source("test").loc("chr1", 1, 4); final Allele originalRef = Allele.create("CCCC", true); final Allele originalAlt = Allele.create("C", false); builder.alleles(Arrays.asList(originalRef, originalAlt)); final Interval interval = new Interval("chr1", 1, 4, true, "test "); final String reference = "ATGATGATGA"; final ReferenceSequence refSeq = new ReferenceSequence("chr1", 10, reference.getBytes()); // we don't actually care what the results are here -- we just want to make sure that it doesn't fail final VariantContextBuilder result = LiftoverUtils.reverseComplementVariantContext(builder.make(), interval, refSeq); }
private void generateAllLocationsAndVariantContextCombinations(final String[] chrs, final int[] eventSizes, final int[] eventStarts, final GenomeLoc[] locs, final VariantContext[] events) { int nextIndex = 0; for (final String chr : chrs ) for (final int size : eventSizes ) for (final int starts : eventStarts ) { locs[nextIndex] = genomeLocParser.createGenomeLoc(chr,starts,starts + Math.max(0,size)); events[nextIndex++] = new VariantContextBuilder().source("test").loc(chr,starts,starts + Math.max(0,size)).alleles(Arrays.asList( Allele.create(randomBases(size <= 0 ? 1 : size + 1, true), true), Allele.create(randomBases(size < 0 ? -size + 1 : 1, false), false))).make(); } locs[nextIndex++] = GenomeLoc.UNMAPPED; }
private void basicInsertTest(String ref, String alt, int loc, Cigar cigar, String hap, String newHap) { final Haplotype h = new Haplotype(hap.getBytes()); final Allele h1refAllele = Allele.create(ref, true); final Allele h1altAllele = Allele.create(alt, false); final ArrayList<Allele> alleles = new ArrayList<Allele>(); alleles.add(h1refAllele); alleles.add(h1altAllele); final VariantContext vc = new VariantContextBuilder().alleles(alleles).loc("1", loc, loc + h1refAllele.getBases().length - 1).make(); h.setAlignmentStartHapwrtRef(0); h.setCigar(cigar); final Haplotype h1 = h.insertAllele(vc.getReference(), vc.getAlternateAllele(0), loc, vc.getStart()); final Haplotype h1expected = new Haplotype(newHap.getBytes()); Assert.assertEquals(h1, h1expected); }
@Test public void testSimple() throws Exception { VariantContextBuilder vctx_builder = new VariantContextBuilder(); ArrayList<Allele> alleles = new ArrayList<Allele>(); alleles.add(Allele.create("A", false)); alleles.add(Allele.create("C", true)); vctx_builder.alleles(alleles); GenotypesContext genotypes = GenotypesContext.NO_GENOTYPES; vctx_builder.genotypes(genotypes); HashSet<String> filters = new HashSet<String>(); vctx_builder.filters(filters); HashMap<String, Object> attributes = new HashMap<String, Object>(); attributes.put("NS", new Integer(4)); vctx_builder.attributes(attributes); vctx_builder.loc("20", 2, 2); vctx_builder.log10PError(-8.0); String[] expected = new String[]{"20", "2", ".", "C", "A", "80", "PASS", "NS=4"}; VariantContext ctx = vctx_builder.make(); writable.set(ctx); writer.write(1L, writable); writer.close(taskAttemptContext); LineNumberReader reader = new LineNumberReader(new FileReader(test_vcf_output)); skipHeader(reader); String[] fields = Arrays.copyOf(reader.readLine().split("\t"), expected.length); Assert.assertArrayEquals("comparing VCF single line", expected, fields); }
private Pair<VariantContext,VariantContext> getData1() { Allele reference_A = Allele.create(BaseUtils.Base.A.base,true); Allele alt_C = Allele.create(BaseUtils.Base.C.base); Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C)); Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C)); Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,reference_A)); Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(alt_C,alt_C)); GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3); VariantContextBuilder eval_1_builder = new VariantContextBuilder(); VariantContextBuilder truth_1_builder = new VariantContextBuilder(); eval_1_builder.alleles(Arrays.asList(reference_A,alt_C)); truth_1_builder.alleles(Arrays.asList(reference_A,alt_C)); eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make()); return testData; }
vcb.loc("20",refOffset,refOffset + referenceAllele.length() -1);
private Pair<VariantContext,VariantContext> getData5() { Allele reference_A = Allele.create(BaseUtils.Base.A.base,true); Allele alt_C = Allele.create(BaseUtils.Base.C.base); Allele alt_T = Allele.create(BaseUtils.Base.T.base); Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", new ArrayList<Allele>(0)); Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C)); Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C)); Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", new ArrayList<Allele>(0)); GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3); VariantContextBuilder eval_1_builder = new VariantContextBuilder(); VariantContextBuilder truth_1_builder = new VariantContextBuilder(); eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T)); truth_1_builder.alleles(Arrays.asList(reference_A,alt_C)); eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make()); return testData; }
private Pair<VariantContext,VariantContext> getData2() { Allele reference_A = Allele.create(BaseUtils.Base.A.base,true); Allele alt_C = Allele.create(BaseUtils.Base.C.base); Allele alt_T = Allele.create(BaseUtils.Base.T.base); Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_T)); Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C)); Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C)); Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(alt_C,alt_C)); GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3); VariantContextBuilder eval_1_builder = new VariantContextBuilder(); VariantContextBuilder truth_1_builder = new VariantContextBuilder(); eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T)); truth_1_builder.alleles(Arrays.asList(reference_A,alt_C)); eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make()); return testData; }
private Pair<VariantContext,VariantContext> getData4() { Allele reference_A = Allele.create(BaseUtils.Base.A.base,true); Allele alt_C = Allele.create(BaseUtils.Base.C.base); Allele alt_T = Allele.create(BaseUtils.Base.T.base); Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(Allele.NO_CALL,Allele.NO_CALL)); Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_A,alt_C)); Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_A,reference_A)); Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(reference_A,alt_C)); Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(Allele.NO_CALL,Allele.NO_CALL)); GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 3); VariantContextBuilder eval_1_builder = new VariantContextBuilder(); VariantContextBuilder truth_1_builder = new VariantContextBuilder(); eval_1_builder.alleles(Arrays.asList(reference_A,alt_C,alt_T)); truth_1_builder.alleles(Arrays.asList(reference_A,alt_C)); eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make()); return testData; }
private Pair<VariantContext,VariantContext> getData3() { Allele reference_ACT = Allele.create(new byte[]{BaseUtils.Base.A.base,BaseUtils.Base.C.base,BaseUtils.Base.T.base},true); Allele alt_AC = Allele.create(new byte[]{BaseUtils.Base.A.base,BaseUtils.Base.C.base}); Allele alt_A = Allele.create(BaseUtils.Base.A.base); Allele alt_ATT = Allele.create(new byte[]{BaseUtils.Base.A.base,BaseUtils.Base.T.base,BaseUtils.Base.T.base}); Genotype sam_1_1_eval = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_ACT,alt_ATT)); Genotype sam_1_2_eval = GenotypeBuilder.create("test1_sample2", Arrays.asList(alt_A,alt_A)); Genotype sam_1_3_eval = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_ACT,alt_A)); Genotype sam_1_1_truth = GenotypeBuilder.create("test1_sample1", Arrays.asList(reference_ACT,alt_AC)); Genotype sam_1_2_truth = GenotypeBuilder.create("test1_sample2", Arrays.asList(alt_A,alt_A)); Genotype sam_1_3_truth = GenotypeBuilder.create("test1_sample3", Arrays.asList(reference_ACT,alt_A)); GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1", 3, 5); VariantContextBuilder eval_1_builder = new VariantContextBuilder(); VariantContextBuilder truth_1_builder = new VariantContextBuilder(); eval_1_builder.alleles(Arrays.asList(reference_ACT,alt_ATT,alt_A)); truth_1_builder.alleles(Arrays.asList(reference_ACT,alt_AC,alt_A)); eval_1_builder.genotypes(Arrays.asList(sam_1_1_eval,sam_1_2_eval,sam_1_3_eval)); truth_1_builder.genotypes(Arrays.asList(sam_1_1_truth,sam_1_2_truth,sam_1_3_truth)); eval_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); truth_1_builder.loc(loc.getContig(),loc.getStart(),loc.getStop()); Pair<VariantContext,VariantContext> testData = new Pair<VariantContext, VariantContext>(eval_1_builder.make(),truth_1_builder.make()); return testData; }
private Pair<VariantContext,VariantContext> getMonoallelicData() { final Allele ref = Allele.create(BaseUtils.Base.T.base,true); final Allele alt = Allele.create(BaseUtils.Base.C.base); //Site in eval is monoallelic, both samples are HOM_REF //sample1 in comp is HOM_VAR, sample2 is NO_CALL //None of these should trigger mismatching alleles final GenomeLoc loc = genomeLocParser.createGenomeLoc("chr1",1,1); final VariantContextBuilder site1Comp = new VariantContextBuilder(); final VariantContextBuilder site1Eval = new VariantContextBuilder(); site1Comp.loc(loc.getContig(), loc.getStart(), loc.getStop()); site1Eval.loc(loc.getContig(), loc.getStart(), loc.getStop()); site1Comp.alleles(Arrays.asList(ref)); site1Eval.alleles(Arrays.asList(ref, alt)); site1Comp.genotypes(GenotypeBuilder.create("test2_sample1", Arrays.asList(ref, ref)), GenotypeBuilder.create("test2_sample2", Arrays.asList(ref, ref))); site1Eval.genotypes(GenotypeBuilder.create("test2_sample1",Arrays.asList(alt,alt)), GenotypeBuilder.create("test2_sample2",Arrays.asList(Allele.NO_CALL,Allele.NO_CALL))); return new Pair<>(site1Eval.make(), site1Comp.make()); }
@Test(dataProvider = "SitesAndGenotypesVC") public void runModifyVCTests(SitesAndGenotypesVC cfg) { VariantContext modified = new VariantContextBuilder(cfg.vc).loc("chr2", 123, 123).make(); Assert.assertEquals(modified.getContig(), "chr2"); Assert.assertEquals(modified.getStart(), 123);