@Override public void mapPartition(Iterable<T> values, Collector<IntermediateSampleData<T>> out) throws Exception { DistributedRandomSampler<T> sampler; long seedAndIndex = seed + getRuntimeContext().getIndexOfThisSubtask(); if (withReplacement) { sampler = new ReservoirSamplerWithReplacement<T>(numSample, seedAndIndex); } else { sampler = new ReservoirSamplerWithoutReplacement<T>(numSample, seedAndIndex); } Iterator<IntermediateSampleData<T>> sampled = sampler.sampleInPartition(values.iterator()); while (sampled.hasNext()) { out.collect(sampled.next()); } } }
@Override public void mapPartition(Iterable<T> values, Collector<IntermediateSampleData<T>> out) throws Exception { DistributedRandomSampler<T> sampler; long seedAndIndex = seed + getRuntimeContext().getIndexOfThisSubtask(); if (withReplacement) { sampler = new ReservoirSamplerWithReplacement<T>(numSample, seedAndIndex); } else { sampler = new ReservoirSamplerWithoutReplacement<T>(numSample, seedAndIndex); } Iterator<IntermediateSampleData<T>> sampled = sampler.sampleInPartition(values.iterator()); while (sampled.hasNext()) { out.collect(sampled.next()); } } }
@Override public void mapPartition(Iterable<T> values, Collector<IntermediateSampleData<T>> out) throws Exception { DistributedRandomSampler<T> sampler; long seedAndIndex = seed + getRuntimeContext().getIndexOfThisSubtask(); if (withReplacement) { sampler = new ReservoirSamplerWithReplacement<T>(numSample, seedAndIndex); } else { sampler = new ReservoirSamplerWithoutReplacement<T>(numSample, seedAndIndex); } Iterator<IntermediateSampleData<T>> sampled = sampler.sampleInPartition(values.iterator()); while (sampled.hasNext()) { out.collect(sampled.next()); } } }