/** * Buffers reads until either the end of the file is reached or enough reads have been buffered such * that downsampling can be performed to the desired target accuracy. Once reads have been buffered, * template names are randomly sampled out for discarding until the desired number of reads have * been discarded. * * @return True if one or more reads have been buffered, false otherwise */ protected boolean bufferNextChunkOfRecords(final double proportion, final double accuracy) { final int templatesToRead = (int) Math.ceil(1 / accuracy); final Set<String> names = new HashSet<String>(); final List<SAMRecord> recs = new ArrayList<SAMRecord>(templatesToRead); readFromUnderlyingIterator(recs, names, templatesToRead); // Determine how many templates to keep/discard final int templatesRead = names.size(); final int templatesToKeep = calculateTemplatesToKeep(templatesRead, proportion); // Randomly shuffle a list of all the template names, and then remove some from the set final int templatesToDiscard = templatesRead - templatesToKeep; final List<String> tmp = new ArrayList<String>(names); Collections.shuffle(tmp, this.random); for (int i = 0; i < templatesToDiscard; ++i) names.remove(tmp.get(i)); // Set all the instance state so that advance()/next() get what they need this.bufferedRecordsToKeep = names; this.bufferedRecords = recs.iterator(); this.totalTemplates += templatesRead; this.keptTemplates += names.size(); return !recs.isEmpty(); }
/** * Buffers reads until either the end of the file is reached or enough reads have been buffered such * that downsampling can be performed to the desired target accuracy. Once reads have been buffered, * template names are randomly sampled out for discarding until the desired number of reads have * been discarded. * * @return True if one or more reads have been buffered, false otherwise */ protected boolean bufferNextChunkOfRecords(final double proportion, final double accuracy) { final int templatesToRead = (int) Math.ceil(1 / accuracy); final Set<String> names = new HashSet<String>(); final List<SAMRecord> recs = new ArrayList<SAMRecord>(templatesToRead); readFromUnderlyingIterator(recs, names, templatesToRead); // Determine how many templates to keep/discard final int templatesRead = names.size(); final int templatesToKeep = calculateTemplatesToKeep(templatesRead, proportion); // Randomly shuffle a list of all the template names, and then remove some from the set final int templatesToDiscard = templatesRead - templatesToKeep; final List<String> tmp = new ArrayList<String>(names); Collections.shuffle(tmp, this.random); for (int i = 0; i < templatesToDiscard; ++i) names.remove(tmp.get(i)); // Set all the instance state so that advance()/next() get what they need this.bufferedRecordsToKeep = names; this.bufferedRecords = recs.iterator(); this.totalTemplates += templatesRead; this.keptTemplates += names.size(); return !recs.isEmpty(); }
/** * Resets statistics before reading from the underlying iterator. */ @Override protected void readFromUnderlyingIterator(final List<SAMRecord> recs, final Set<String> names, final int templatesToRead) { // Reset the stats on the underlying iterator ((ConstantMemoryDownsamplingIterator) getUnderlyingIterator()).resetStatistics(); // Read from the underlying iterator super.readFromUnderlyingIterator(recs, names, templatesToRead); }
/** * Resets statistics before reading from the underlying iterator. */ @Override protected void readFromUnderlyingIterator(final List<SAMRecord> recs, final Set<String> names, final int templatesToRead) { // Reset the stats on the underlying iterator ((ConstantMemoryDownsamplingIterator) getUnderlyingIterator()).resetStatistics(); // Read from the underlying iterator super.readFromUnderlyingIterator(recs, names, templatesToRead); }