/** * set our internal reference contig order * @param refFile the reference file */ @Requires("refFile != null") public GenomeLocParser(final ReferenceSequenceFile refFile) { this(refFile.getSequenceDictionary()); }
@Override SAMSequenceDictionary extractDictionary(Path reference) { final SAMSequenceDictionary dict = ReferenceSequenceFileFactory.getReferenceSequenceFile(reference).getSequenceDictionary(); if (dict == null) throw new SAMException("Could not find dictionary next to reference file " + reference.toUri().toString()); return dict; } },
public Set<String> getContigNames() { final TreeSet<String> contigs = new TreeSet<String>(); for( final SAMSequenceRecord r : getToolkit().getReferenceDataSource().getReference().getSequenceDictionary().getSequences()) { contigs.add(r.getSequenceName()); } return contigs; }
@BeforeClass public void init() throws FileNotFoundException { // sequence seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); dict = seq.getSequenceDictionary(); }
/** * Gets the stop of the expanded window, bounded if necessary by the contig. * @param locus The locus to expand. * @return The expanded window. */ private int getWindowStop( GenomeLoc locus ) { // If the locus is not within the bounds of the contig it allegedly maps to, expand only as much as we can. int sequenceLength = reference.getSequenceDictionary().getSequence(locus.getContig()).getSequenceLength(); if(locus.getStop() > sequenceLength) return sequenceLength; return Math.min( locus.getStop() + windowStop, sequenceLength ); } }
/** * This function does the setup of our parser, before each method call. * <p/> * Called before every test case method. */ @BeforeMethod public void doForEachTest() throws FileNotFoundException { // sequence seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference)); genomeLocParser = new GenomeLocParser(seq.getSequenceDictionary()); }
@BeforeClass public void setup() throws FileNotFoundException { final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary()); }
@BeforeClass public void setup() throws FileNotFoundException { final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary()); }
private void init(final ReferenceSequenceFile reference, final SAMFileHeader header) { if (header.getSortOrder() == SAMFileHeader.SortOrder.coordinate) { this.pairEndInfoByName = new CoordinateSortedPairEndInfoMap(); } else { this.pairEndInfoByName = new InMemoryPairEndInfoMap(); } if (reference != null) { this.refFileWalker = new ReferenceSequenceFileWalker(reference); this.samSequenceDictionary = reference.getSequenceDictionary(); } }
@BeforeClass public void init() throws FileNotFoundException { // sequence seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference)); genomeLocParser = new GenomeLocParser(seq); // disable auto-index creation/locking in the RMDTrackBuilder for tests builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null,true,null); }
/** * Test the start of the middle contig. */ @Test public void testContigStart() { // Test the last 25 bases of the first contig. int contigPosition = sequenceFile.getSequenceDictionary().getSequences().size()/2; SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(contigPosition); validateLocation( genomeLocParser.createGenomeLoc(selectedContig.getSequenceName(),1,25) ); }
@Test public void testReadWithoutRG() { final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary()); final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 1, 10); Assert.assertEquals(NGSPlatform.fromRead(read), NGSPlatform.UNKNOWN); } }
/** * Test the end of a contig. */ @Test public void testReferenceEnd() { // Test the last 25 bases of the first contig. SAMSequenceRecord selectedContig = sequenceFile.getSequenceDictionary().getSequences().get(sequenceFile.getSequenceDictionary().getSequences().size()-1); final int contigStart = selectedContig.getSequenceLength() - 24; final int contigStop = selectedContig.getSequenceLength(); validateLocation( genomeLocParser.createGenomeLoc(selectedContig.getSequenceName(),contigStart,contigStop) ); }
/** * Test the initial fasta location. */ @Test public void testReferenceStart() { validateLocation( genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(),1,25) ); }
@BeforeMethod public void setUp() { String fileName = privateTestDir + "TabularDataTest.dat"; triplet = new RMDTriplet("tableTest","Table",fileName,RMDStorageType.FILE,new Tags()); // disable auto-index creation/locking in the RMDTrackBuilder for tests builder = new RMDTrackBuilder(seq.getSequenceDictionary(),genomeLocParser,null,true,null); }
private GenomeLoc getPaddedLoc( final ActiveRegion activeRegion ) { final int padLeft = Math.max(activeRegion.getExtendedLoc().getStart()-REFERENCE_PADDING, 1); final int padRight = Math.min(activeRegion.getExtendedLoc().getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(activeRegion.getExtendedLoc().getContig()).getSequenceLength()); return getToolkit().getGenomeLocParser().createGenomeLoc(activeRegion.getExtendedLoc().getContig(), padLeft, padRight); }
/** Queries outside the bounds of the shard should result in reference context window trimmed at the shard boundary. */ @Test public void testBoundsFailure() { Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(), 1, 50))); LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, shard.getGenomeLocs().get(0), null, sequenceFile, null); LocusReferenceView view = new LocusReferenceView(dataProvider); GenomeLoc locus = genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(), 50, 51); ReferenceContext rc = view.getReferenceContext(locus); Assert.assertTrue(rc.getLocus().equals(locus)); Assert.assertTrue(rc.getWindow().equals(genomeLocParser.createGenomeLoc(sequenceFile.getSequenceDictionary().getSequence(0).getSequenceName(),50))); Assert.assertTrue(rc.getBases().length == 1); }
@Test (enabled = true) public void testReadWithNsRefIndexInDeletion() throws FileNotFoundException { final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference)); final SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(seq.getSequenceDictionary()); final int readLength = 76; final GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "myRead", 0, 8975, readLength); read.setReadBases(Utils.dupBytes((byte) 'A', readLength)); read.setBaseQualities(Utils.dupBytes((byte)30, readLength)); read.setCigarString("3M414N1D73M"); final int result = ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, 9392, ReadUtils.ClippingTail.LEFT_TAIL); Assert.assertEquals(result, 2); }
@BeforeTest public void before() { // Create GenomeLoc ReferenceSequenceFile fasta = CachingIndexedFastaSequenceFile.checkAndCreate(new File(privateTestDir + "iupacFASTA.fasta")); GenomeLocParser genomeLocParser = new GenomeLocParser(fasta); chr1 = fasta.getSequenceDictionary().getSequence(0).getSequenceName(); genomeLoc = genomeLocParser.createGenomeLoc(chr1, 5, 10); }
private LocationAwareSeekableRODIterator initializeRefSeq() { RMDTrackBuilder builder = new RMDTrackBuilder(getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), getToolkit().getGenomeLocParser(), getToolkit().getArguments().unsafe, getToolkit().getArguments().disableAutoIndexCreationAndLockingWhenReadingRods, null); RMDTrack refseq = builder.createInstanceOfTrack(RefSeqCodec.class,refSeqGeneList); return new SeekableRODIterator(refseq.getHeader(),refseq.getSequenceDictionary(),getToolkit().getReferenceDataSource().getReference().getSequenceDictionary(), getToolkit().getGenomeLocParser(),refseq.getIterator()); }