private void addRepresentativeReadOfDuplicateSet(final long representativeReadIndexInFile, final int setSize, final long read1IndexInFile) { final RepresentativeReadIndexer rri = new RepresentativeReadIndexer(); rri.representativeReadIndexInFile = (int) representativeReadIndexInFile; rri.setSize = setSize; rri.readIndexInFile = (int) read1IndexInFile; this.representativeReadIndicesForDuplicates.add(rri); }
@Override public void close() { super.close(); sorter.cleanup(); } }, ADD_MATE_CIGAR, IGNORE_MISSING_MATES);
private SortingCollection<String> makeSortingCollection() { final String name = getClass().getSimpleName(); final File tmpDir = IOUtil.createTempDir(name, null); tmpDir.deleteOnExit(); // 256 byte for one name, and 1/10 part of all memory for this, rough estimate long maxNamesInRam = Runtime.getRuntime().maxMemory() / 256 / 10; return SortingCollection.newInstance( String.class, new StringCodec(), String::compareTo, (int) Math.min(maxNamesInRam, Integer.MAX_VALUE), tmpDir ); }
VCFHeader header = reader.getFileHeader(); try (CloseableIterator<VariantContext> rit = reader.iterator()) { collection = SortingCollection.newInstance( VariantContext.class, new VCFRecordCodec(header), processContext.getFileSystemContext().getTemporaryDirectory().toPath()); while (rit.hasNext()) { collection.add(rit.next()); collection.doneAdding(); try (VariantContextWriter writer = processContext.getVariantContextWriter(tmpout, indexed)) { try (CloseableIterator<VariantContext> wit = collection.iterator()) { while (wit.hasNext()) { writer.add(wit.next()); collection.cleanup(); collection = null; if (tmpout != output) { if (collection != null) collection.cleanup(); if (tmpout != output & tmpout.exists()) { FileHelper.delete(tmpout, true);
final SortingCollection<SAMRecord> alignmentSorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), this.comparator, maxRecordsInRam, tmpDir); alignmentSorter.add(record); if (progressLogger != null) progressLogger.record(record); this.wrappedIterator = alignmentSorter.iterator(); if (log != null) log.info("Duplicate set iterator initialized.");
if (TAG_DUPLICATE_SET_MEMBERS) { final RepresentativeReadIndexerCodec representativeIndexCodec = new RepresentativeReadIndexerCodec(); this.representativeReadIndicesForDuplicates = SortingCollection.newInstance(RepresentativeReadIndexer.class, representativeIndexCodec, Comparator.comparing(read -> read.readIndexInFile), this.pairSort.cleanup(); this.pairSort = null; this.fragSort.cleanup(); this.fragSort = null; this.representativeReadIndicesForDuplicates.doneAdding();
@Test public void spillToDiskTest() { final SortingCollection<String> sortingCollection = makeSortingCollection(10); final String[] strings = new String[] { "1", "2", "3" }; for (String str : strings) { sortingCollection.add(str); } Assert.assertEquals(tmpDir().list().length, 0); sortingCollection.spillToDisk(); Assert.assertEquals(tmpDir().list().length, 1); assertIteratorEqualsList(strings, sortingCollection.iterator()); sortingCollection.cleanup(); Assert.assertEquals(tmpDir().list().length, 0); }
this.pairSort = SortingCollection.newInstance(ReadEndsForMarkDuplicates.class, pairCodec, new ReadEndsMDComparator(useBarcodes), TMP_DIR); this.fragSort = SortingCollection.newInstance(ReadEndsForMarkDuplicates.class, fragCodec, new ReadEndsMDComparator(useBarcodes), final long indexForRead = assumedSortOrder == SAMFileHeader.SortOrder.queryname ? duplicateIndex : index; final ReadEndsForMarkDuplicates fragmentEnd = buildReadEnds(header, indexForRead, rec, useBarcodes); this.fragSort.add(fragmentEnd); this.pairSort.add(pairedEnds); this.pairSort.doneAdding(); this.fragSort.doneAdding();
/** * Generate some strings, put into SortingCollection, confirm that the right number of * Strings come out, and in the right order. * @param numStringsToGenerate * @param maxRecordsInRam */ @Test(dataProvider = "test1") public void testPositive(final String testName, final int numStringsToGenerate, final int maxRecordsInRam) { final String[] strings = new String[numStringsToGenerate]; int numStringsGenerated = 0; final SortingCollection<String> sortingCollection = makeSortingCollection(maxRecordsInRam); for (final String s : new RandomStringGenerator(numStringsToGenerate)) { sortingCollection.add(s); strings[numStringsGenerated++] = s; } Arrays.sort(strings, new StringComparator()); Assert.assertEquals(tmpDirIsEmpty(), numStringsToGenerate <= maxRecordsInRam); sortingCollection.setDestructiveIteration(false); assertIteratorEqualsList(strings, sortingCollection.iterator()); assertIteratorEqualsList(strings, sortingCollection.iterator()); sortingCollection.cleanup(); Assert.assertEquals(tmpDir().list().length, 0); }
SortingCollection.newInstance( VariantContext.class, new VCFRecordCodec(outputHeader, VALIDATION_STRINGENCY != ValidationStringency.STRICT), log.info("Reading entries from input file " + readerCount); for (final VariantContext variantContext : reader) { sorter.add(variantContext); readProgress.record(variantContext.getContig(), variantContext.getStart());
final SortingCollection<SAMRecord> sorted1 = SortingCollection.newInstance( SAMRecord.class, new BAMRecordCodec(header), new SAMRecordCoordinateComparator(), this.maxRecordsInRam); sink.sorter.cleanup();
/** Closes the underlying resource. */ void close() { if (this.writer != null) { this.writer.close(); } if (this.sorter != null) { this.sorter.doneAdding(); } } }
/** * Syntactic sugar around the ctor, to save some typing of type parameters * * @param componentType Class of the record to be sorted. Necessary because of Java generic lameness. * @param codec For writing records to file and reading them back into RAM * @param comparator Defines output sort order * @param maxRecordsInRAM how many records to accumulate in memory before spilling to disk * @param tmpDir Where to write files of records that will not fit in RAM */ public static <T> SortingCollection<T> newInstance(final Class<T> componentType, final SortingCollection.Codec<T> codec, final Comparator<T> comparator, final int maxRecordsInRAM, final File... tmpDir) { return new SortingCollection<T>(componentType, codec, comparator, maxRecordsInRAM, tmpDir); }
public void add(final T rec) { if (doneAdding) { throw new IllegalStateException("Cannot add after calling doneAdding()"); } if (iterationStarted) { throw new IllegalStateException("Cannot add after calling iterator()"); } if (numRecordsInRam == maxRecordsInRam) { spillToDisk(); } ramRecords[numRecordsInRam++] = rec; }
List<PeekableIterator<SAMRecord>> iterators() { final List<PeekableIterator<SAMRecord>> iterators = new ArrayList<>(); if (outputByReadGroup) { for (final SortingCollection<SAMRecord> sorter : sorterMap.values()) { final PeekableIterator<SAMRecord> iterator = new PeekableIterator<>(sorter.iterator()); iterators.add(iterator); } } else { final PeekableIterator<SAMRecord> iterator = new PeekableIterator<>(singleSorter.iterator()); iterators.add(iterator); } return iterators; } }
try { Arrays.sort(this.ramRecords, 0, this.numRecordsInRam, this.comparator); final File f = newTempFile(); OutputStream os = null; try {
collection = SortingCollection.newInstance( SAMRecord.class, new BAMRecordCodec(header), fsc.getTemporaryDirectory().toPath()); while (rit.hasNext()) { collection.add(rit.next()); collection.doneAdding(); try (SAMFileWriter writer = writerFactory.makeSAMOrBAMWriter(header, true, tmpFile)) { writer.setProgressLogger(new ProgressLogger(log, 10000000)); try (CloseableIterator<SAMRecord> wit = collection.iterator()) { @SuppressWarnings("resource") // bad eclipse warning Iterator<SAMRecord> it = wit; collection.cleanup(); collection = null; if (tmpFile != output) { if (collection != null) collection.cleanup(); if (tmpFile != output & tmpFile.exists()) { FileHelper.delete(tmpFile, true);
final SortingCollection<SAMRecord> alignmentSorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), this.comparator, maxRecordsInRam, tmpDir); alignmentSorter.add(record); if (progressLogger != null) progressLogger.record(record); this.wrappedIterator = alignmentSorter.iterator(); if (log != null) log.info("Duplicate set iterator initialized.");