ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = (K[])sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples"); RawComparator<K> comparator = (RawComparator<K>) job.getGroupingComparator();
ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples"); RawComparator<K> comparator =
ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = (K[])sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples"); RawComparator<K> comparator =
ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = (K[])sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples"); RawComparator<K> comparator =
ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = (K[])sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples"); RawComparator<K> comparator =
ReflectionUtils.newInstance(job.getInputFormatClass(), conf); int numPartitions = job.getNumReduceTasks(); K[] samples = (K[])sampler.getSample(inf, job); LOG.info("Using " + samples.length + " samples"); RawComparator<K> comparator =
/** * Verify IntervalSampler contract, that samples are taken at regular * intervals from the given splits. */ @Test @SuppressWarnings("unchecked") // IntWritable comparator not typesafe public void testIntervalSampler() throws Exception { final int TOT_SPLITS = 16; final int PER_SPLIT_SAMPLE = 4; final int NUM_SAMPLES = TOT_SPLITS * PER_SPLIT_SAMPLE; final double FREQ = 1.0 / TOT_SPLITS; InputSampler.Sampler<IntWritable,NullWritable> sampler = new InputSampler.IntervalSampler<IntWritable,NullWritable>( FREQ, NUM_SAMPLES); int inits[] = new int[TOT_SPLITS]; for (int i = 0; i < TOT_SPLITS; ++i) { inits[i] = i; } Job ignored = Job.getInstance(); Object[] samples = sampler.getSample(new TestInputSamplerIF( NUM_SAMPLES, TOT_SPLITS, inits), ignored); assertEquals(NUM_SAMPLES, samples.length); Arrays.sort(samples, new IntWritable.Comparator()); for (int i = 0; i < NUM_SAMPLES; ++i) { assertEquals(i, ((IntWritable)samples[i]).get()); } }
/** * Verify SplitSampler contract, that an equal number of records are taken * from the first splits. */ @Test @SuppressWarnings("unchecked") // IntWritable comparator not typesafe public void testSplitSampler() throws Exception { final int TOT_SPLITS = 15; final int NUM_SPLITS = 5; final int STEP_SAMPLE = 5; final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE; InputSampler.Sampler<IntWritable,NullWritable> sampler = new InputSampler.SplitSampler<IntWritable,NullWritable>( NUM_SAMPLES, NUM_SPLITS); int inits[] = new int[TOT_SPLITS]; for (int i = 0; i < TOT_SPLITS; ++i) { inits[i] = i * STEP_SAMPLE; } Job ignored = Job.getInstance(); Object[] samples = sampler.getSample( new TestInputSamplerIF(100000, TOT_SPLITS, inits), ignored); assertEquals(NUM_SAMPLES, samples.length); Arrays.sort(samples, new IntWritable.Comparator()); for (int i = 0; i < NUM_SAMPLES; ++i) { assertEquals(i, ((IntWritable)samples[i]).get()); } }
/** * Verify IntervalSampler contract, that samples are taken at regular * intervals from the given splits. */ @Test @SuppressWarnings("unchecked") // IntWritable comparator not typesafe public void testIntervalSampler() throws Exception { final int TOT_SPLITS = 16; final int PER_SPLIT_SAMPLE = 4; final int NUM_SAMPLES = TOT_SPLITS * PER_SPLIT_SAMPLE; final double FREQ = 1.0 / TOT_SPLITS; InputSampler.Sampler<IntWritable,NullWritable> sampler = new InputSampler.IntervalSampler<IntWritable,NullWritable>( FREQ, NUM_SAMPLES); int inits[] = new int[TOT_SPLITS]; for (int i = 0; i < TOT_SPLITS; ++i) { inits[i] = i; } Job ignored = Job.getInstance(); Object[] samples = sampler.getSample(new TestInputSamplerIF( NUM_SAMPLES, TOT_SPLITS, inits), ignored); assertEquals(NUM_SAMPLES, samples.length); Arrays.sort(samples, new IntWritable.Comparator()); for (int i = 0; i < NUM_SAMPLES; ++i) { assertEquals(i, ((IntWritable)samples[i]).get()); } }
/** * Verify SplitSampler contract, that an equal number of records are taken * from the first splits. */ @Test @SuppressWarnings("unchecked") // IntWritable comparator not typesafe public void testSplitSampler() throws Exception { final int TOT_SPLITS = 15; final int NUM_SPLITS = 5; final int STEP_SAMPLE = 5; final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE; InputSampler.Sampler<IntWritable,NullWritable> sampler = new InputSampler.SplitSampler<IntWritable,NullWritable>( NUM_SAMPLES, NUM_SPLITS); int inits[] = new int[TOT_SPLITS]; for (int i = 0; i < TOT_SPLITS; ++i) { inits[i] = i * STEP_SAMPLE; } Job ignored = Job.getInstance(); Object[] samples = sampler.getSample( new TestInputSamplerIF(100000, TOT_SPLITS, inits), ignored); assertEquals(NUM_SAMPLES, samples.length); Arrays.sort(samples, new IntWritable.Comparator()); for (int i = 0; i < NUM_SAMPLES; ++i) { assertEquals(i, ((IntWritable)samples[i]).get()); } }