/** * Method to scatter an interval list by locus. * * @param list The list of intervals to scatter * @return The scattered intervals, represented as a {@link List} of {@link IntervalList} */ private List<IntervalList> writeScatterIntervals(final IntervalList list) { final IntervalListScatterer scatterer = SUBDIVISION_MODE.make(); final List<IntervalList> scattered = scatterer.scatter(list, SCATTER_COUNT); final DecimalFormat fileNameFormatter = new DecimalFormat("0000"); int fileIndex = 1; for (final IntervalList intervals : scattered) { intervals.write(createDirectoryAndGetScatterFile(OUTPUT, scattered.size(), fileNameFormatter.format(fileIndex++))); } return scattered; }
/** * Method to scatter an interval list by locus. * * @param list The list of intervals to scatter * @return The scattered intervals, represented as a {@link List} of {@link IntervalList} */ private List<IntervalList> writeScatterIntervals(final IntervalList list) { final IntervalListScatterer scatterer = SUBDIVISION_MODE.make(); final List<IntervalList> scattered = scatterer.scatter(list, SCATTER_COUNT); final DecimalFormat fileNameFormatter = new DecimalFormat("0000"); int fileIndex = 1; for (final IntervalList intervals : scattered) { intervals.write(createDirectoryAndGetScatterFile(OUTPUT, scattered.size(), fileNameFormatter.format(fileIndex++))); } return scattered; }
/** * Splits an interval list into multiple files. * @param fileHeader The sam file header. * @param splits Pre-divided genome locs returned by splitFixedIntervals. * @param scatterParts The output interval lists to write to. */ public static void scatterFixedIntervals(SAMFileHeader fileHeader, List<List<GenomeLoc>> splits, List<File> scatterParts) { if (splits.size() != scatterParts.size()) throw new UserException.BadArgumentValue("splits", String.format("Split points %d does not equal the number of scatter parts %d.", splits.size(), scatterParts.size())); int fileIndex = 0; int locIndex = 1; for (final List<GenomeLoc> split : splits) { IntervalList intervalList = new IntervalList(fileHeader); for (final GenomeLoc loc : split) intervalList.add(toInterval(loc, locIndex++)); intervalList.write(scatterParts.get(fileIndex++)); } }
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final IntervalList intervalList = VCFFileReader.fromVcf(INPUT, INCLUDE_FILTERED); // Sort and write the output intervalList.uniqued().write(OUTPUT); return 0; } }
@Override protected int doWork() { IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); final IntervalList intervalList = VCFFileReader.fromVcf(INPUT, INCLUDE_FILTERED); // Sort and write the output intervalList.uniqued().write(OUTPUT); return 0; } }
outList.write(scatterParts.get(partIdx));
public void onTraversalDone(EventPair sum) { if ( sum.left != null && sum.left.isReportableEvent() ) sum.intervals.add(sum.left.getLoc()); if ( sum.right != null && sum.right.isReportableEvent() ) sum.intervals.add(sum.right.getLoc()); if ( FilenameUtils.getExtension(out.getName()).equals("interval_list") ) { final SAMFileHeader masterSequenceDictionaryHeader = new SAMFileHeader(); masterSequenceDictionaryHeader.setSequenceDictionary(getToolkit().getMasterSequenceDictionary()); final IntervalList intervalList = new IntervalList(masterSequenceDictionaryHeader); for ( GenomeLoc loc : sum.intervals ) { intervalList.add(new Interval(loc.getContig(), loc.getStart(), loc.getStop())); } intervalList.write(out); } else { try ( BufferedWriter bufferedWriter = IOUtil.openFileForBufferedWriting(out) ) { for ( GenomeLoc loc : sum.intervals ) { bufferedWriter.write(loc.toString()); bufferedWriter.newLine(); } } catch (final IOException e) { throw new GATKException("Error writing out intervals to file: " + out.getAbsolutePath(), e); } } }
@Override protected int doWork() { IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); IOUtil.assertFileIsWritable(OUTPUT); final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE, true); if (!refFile.isIndexed()) { throw new IllegalStateException("Reference file must be indexed, but no index file was found"); } if (refFile.getSequenceDictionary() == null) { throw new IllegalStateException("Reference file must include a dictionary, but no dictionary file was found"); } // get the intervals final IntervalList intervals = segregateReference(refFile, MAX_TO_MERGE); log.info(String.format("Found %d intervals in %d loci during %s seconds", intervalProgress.getCount(), locusProgress.getCount(), locusProgress.getElapsedSeconds())); /********************************** * Now output regions for calling * **********************************/ final IntervalList outputIntervals = new IntervalList(intervals.getHeader().clone()); log.info(String.format("Collecting requested type of intervals (%s)", OUTPUT_TYPE)); intervals.getIntervals().stream().filter(i -> OUTPUT_TYPE.accepts(i.getName())).forEach(outputIntervals::add); log.info("Writing Intervals."); outputIntervals.write(OUTPUT); log.info(String.format("Execution ending. Total time %d seconds", locusProgress.getElapsedSeconds())); return 0; }
public static void writeFlankingIntervals(File reference, File inputIntervals, File flankingIntervals, int basePairs) { final ReferenceSequenceFile referenceSequenceFile = createReference(reference); GenomeLocParser parser = new GenomeLocParser(referenceSequenceFile); List<GenomeLoc> originalList = intervalFileToList(parser, inputIntervals.getAbsolutePath()); if (originalList.isEmpty()) throw new UserException.MalformedFile(inputIntervals, "File contains no intervals"); List<GenomeLoc> flankingList = getFlankingIntervals(parser, originalList, basePairs); if (flankingList.isEmpty()) throw new UserException.MalformedFile(inputIntervals, "Unable to produce any flanks for the intervals"); SAMFileHeader samFileHeader = new SAMFileHeader(); samFileHeader.setSequenceDictionary(referenceSequenceFile.getSequenceDictionary()); IntervalList intervalList = new IntervalList(samFileHeader); int i = 0; for (GenomeLoc loc: flankingList) intervalList.add(toInterval(loc, ++i)); intervalList.write(flankingIntervals); }
@Override protected int doWork() { IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE); IOUtil.assertFileIsWritable(OUTPUT); final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE, true); if (!refFile.isIndexed()) { throw new IllegalStateException("Reference file must be indexed, but no index file was found"); } if (refFile.getSequenceDictionary() == null) { throw new IllegalStateException("Reference file must include a dictionary, but no dictionary file was found"); } // get the intervals final IntervalList intervals = segregateReference(refFile, MAX_TO_MERGE); log.info(String.format("Found %d intervals in %d loci during %s seconds", intervalProgress.getCount(), locusProgress.getCount(), locusProgress.getElapsedSeconds())); /********************************** * Now output regions for calling * **********************************/ final IntervalList outputIntervals = new IntervalList(intervals.getHeader().clone()); log.info(String.format("Collecting requested type of intervals (%s)", OUTPUT_TYPE)); intervals.getIntervals().stream().filter(i -> OUTPUT_TYPE.accepts(i.getName())).forEach(outputIntervals::add); log.info("Writing Intervals."); outputIntervals.write(OUTPUT); log.info(String.format("Execution ending. Total time %d seconds", locusProgress.getElapsedSeconds())); return 0; }
@Test(dataProvider="invalidIntervalTestData", expectedExceptions=UserException.class, enabled = true) public void testInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser, String contig, int intervalStart, int intervalEnd ) throws Exception { SAMFileHeader picardFileHeader = new SAMFileHeader(); picardFileHeader.addSequence(genomeLocParser.getContigInfo(contig)); IntervalList picardIntervals = new IntervalList(picardFileHeader); picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname")); File picardIntervalFile = createTempFile("testInvalidPicardIntervalHandling", ".intervals"); picardIntervals.write(picardIntervalFile); List<IntervalBinding<Feature>> intervalArgs = new ArrayList<IntervalBinding<Feature>>(1); intervalArgs.add(new IntervalBinding<Feature>(picardIntervalFile.getAbsolutePath())); IntervalUtils.loadIntervals(intervalArgs, IntervalSetRule.UNION, IntervalMergingRule.ALL, 0, genomeLocParser); }
output.write(OUTPUT); resultIntervals = Collections.singletonList(output); } else {
rRnaIntervalList.write(rRnaIntervalsFile); rRnaIntervalsFile.deleteOnExit();
final File rRnaIntervalsFile = File.createTempFile("tmp.rRna.", ".interval_list"); rRnaIntervalsFile.deleteOnExit(); rRnaIntervalList.write(rRnaIntervalsFile);
if (SORT) out = out.sorted(); if (UNIQUE) out = out.uniqued(); out.write(OUTPUT);
if (SORT) out = out.sorted(); if (UNIQUE) out = out.uniqued(); out.write(OUTPUT);
final File rRnaIntervalsFile = File.createTempFile("tmp.rRna.", ".interval_list"); rRnaIntervalsFile.deleteOnExit(); rRnaIntervalList.write(rRnaIntervalsFile);
final File rRnaIntervalsFile = File.createTempFile("tmp.rRna.", ".interval_list"); rRnaIntervalsFile.deleteOnExit(); rRnaIntervalList.write(rRnaIntervalsFile);