/** * Constructs a chained iterator that will read from the provided iterator and attempt to downsampling to the provided proportion. */ ChainedDownsamplingIterator(final Iterator<SAMRecord> iterator, final double proportion, final int seed) { super(new ConstantMemoryDownsamplingIterator(iterator, adjustProportion(proportion), seed), proportion, seed); // Deal with the fact that the iterator will advance and discard some reads at construction final long discarded = ((ConstantMemoryDownsamplingIterator) getUnderlyingIterator()).getDiscardedCount(); recordDiscardRecords(discarded); }
@Override protected int calculateTemplatesToKeep(final int templatesRead, final double overallProportion) { // Calculate an adjusted proportion to keep, knowing what proportion the underlying iterator discarded final ConstantMemoryDownsamplingIterator iter = (ConstantMemoryDownsamplingIterator) getUnderlyingIterator(); final double priorProportion = iter.getAcceptedFraction(); final double p = Math.max(0, Math.min(1, overallProportion / priorProportion)); final int retval = super.calculateTemplatesToKeep(templatesRead, p); // Record all the discarded records to keep the overall statistics accurate, but do it after // the call to super() so it doesn't affect the proportion calculation. recordDiscardRecords(iter.getDiscardedCount()); return retval; } }
/** * Creates a new DownsamplingIterator using the supplied Strategy that attempts to read from the provided iterator and return * approximately proportion of the records read. * * @param iterator The iterator from which to consume SAMRecords * @param strategy The downsampling strategy to use * @param proportion The proportion of records the downsampling strategy should attempt to emit * @param accuracy If supported by the downsampling strategy, the accuracy goal for the downsampler. Higher accuracy will generally * require higher memory usage. An accuracy value of 0.0001 tells the strategy to try and ensure the emitted proportion * is within proportion +/0 0.0001. * @param seed The seed value to use for any random process used in down-sampling. */ public static DownsamplingIterator make(final Iterator<SAMRecord> iterator, final Strategy strategy, final double proportion, final double accuracy, final int seed) { if (strategy == null) throw new IllegalArgumentException("strategy may not be null"); if (iterator == null) throw new IllegalArgumentException("iterator may not be null"); if (proportion < 0) throw new IllegalArgumentException("proportion must be greater than 0"); if (proportion > 1) throw new IllegalArgumentException("proportion must be less than 1"); switch (strategy) { case HighAccuracy: return new HighAccuracyDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy); case ConstantMemory: return new ConstantMemoryDownsamplingIterator(iterator, proportion, seed); case Chained: return new ChainedDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy); default: throw new IllegalStateException("Unexpected value for Strategy enum in switch statement. Bug!!"); } }
/** * Resets statistics before reading from the underlying iterator. */ @Override protected void readFromUnderlyingIterator(final List<SAMRecord> recs, final Set<String> names, final int templatesToRead) { // Reset the stats on the underlying iterator ((ConstantMemoryDownsamplingIterator) getUnderlyingIterator()).resetStatistics(); // Read from the underlying iterator super.readFromUnderlyingIterator(recs, names, templatesToRead); }
@Override protected int calculateTemplatesToKeep(final int templatesRead, final double overallProportion) { // Calculate an adjusted proportion to keep, knowing what proportion the underlying iterator discarded final ConstantMemoryDownsamplingIterator iter = (ConstantMemoryDownsamplingIterator) getUnderlyingIterator(); final double priorProportion = iter.getAcceptedFraction(); final double p = Math.max(0, Math.min(1, overallProportion / priorProportion)); final int retval = super.calculateTemplatesToKeep(templatesRead, p); // Record all the discarded records to keep the overall statistics accurate, but do it after // the call to super() so it doesn't affect the proportion calculation. recordDiscardRecords(iter.getDiscardedCount()); return retval; } }
/** * Creates a new DownsamplingIterator using the supplied Strategy that attempts to read from the provided iterator and return * approximately proportion of the records read. * * @param iterator The iterator from which to consume SAMRecords * @param strategy The downsampling strategy to use * @param proportion The proportion of records the downsampling strategy should attempt to emit * @param accuracy If supported by the downsampling strategy, the accuracy goal for the downsampler. Higher accuracy will generally * require higher memory usage. An accuracy value of 0.0001 tells the strategy to try and ensure the emitted proportion * is within proportion +/0 0.0001. * @param seed The seed value to use for any random process used in down-sampling. */ public static DownsamplingIterator make(final Iterator<SAMRecord> iterator, final Strategy strategy, final double proportion, final double accuracy, final int seed) { if (strategy == null) throw new IllegalArgumentException("strategy may not be null"); if (iterator == null) throw new IllegalArgumentException("iterator may not be null"); if (proportion < 0) throw new IllegalArgumentException("proportion must be greater than 0"); if (proportion > 1) throw new IllegalArgumentException("proportion must be less than 1"); switch (strategy) { case HighAccuracy: return new HighAccuracyDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy); case ConstantMemory: return new ConstantMemoryDownsamplingIterator(iterator, proportion, seed); case Chained: return new ChainedDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy); default: throw new IllegalStateException("Unexpected value for Strategy enum in switch statement. Bug!!"); } }
/** * Resets statistics before reading from the underlying iterator. */ @Override protected void readFromUnderlyingIterator(final List<SAMRecord> recs, final Set<String> names, final int templatesToRead) { // Reset the stats on the underlying iterator ((ConstantMemoryDownsamplingIterator) getUnderlyingIterator()).resetStatistics(); // Read from the underlying iterator super.readFromUnderlyingIterator(recs, names, templatesToRead); }
/** * Constructs a chained iterator that will read from the provided iterator and attempt to downsampling to the provided proportion. */ ChainedDownsamplingIterator(final Iterator<SAMRecord> iterator, final double proportion, final int seed) { super(new ConstantMemoryDownsamplingIterator(iterator, adjustProportion(proportion), seed), proportion, seed); // Deal with the fact that the iterator will advance and discard some reads at construction final long discarded = ((ConstantMemoryDownsamplingIterator) getUnderlyingIterator()).getDiscardedCount(); recordDiscardRecords(discarded); }