public List<Pair<K3, V3>> run() throws IOException { List<Pair<K2, V2>> mapOutputs = new ArrayList<Pair<K2, V2>>(); // run map component for (Pair<K1, V1> input : inputList) { LOG.debug("Mapping input " + input.toString() + ")"); mapOutputs.addAll(new MapDriver<K1, V1, K2, V2>(myMapper).withInput( input).withCounters(getCounters()).withConfiguration(configuration).run()); } List<Pair<K2, List<V2>>> reduceInputs = shuffle(mapOutputs); List<Pair<K3, V3>> reduceOutputs = new ArrayList<Pair<K3, V3>>(); for (Pair<K2, List<V2>> input : reduceInputs) { K2 inputKey = input.getFirst(); List<V2> inputValues = input.getSecond(); StringBuilder sb = new StringBuilder(); formatValueList(inputValues, sb); LOG.debug("Reducing input (" + inputKey.toString() + ", " + sb.toString() + ")"); reduceOutputs.addAll(new ReduceDriver<K2, V2, K3, V3>(myReducer) .withCounters(getCounters()).withConfiguration(configuration) .withInputKey(inputKey).withInputValues(inputValues).run()); } return reduceOutputs; }
@Override public List<Pair<K3, V3>> run() throws IOException { try { preRunChecks(myMapper, myReducer); initDistributedCache(); List<Pair<K2, V2>> mapOutputs = new ArrayList<Pair<K2, V2>>(); // run map component LOG.debug("Starting map phase with mapper: " + myMapper); mapOutputs.addAll(MapDriver.newMapDriver(myMapper) .withCounters(getCounters()).withConfiguration(getConfiguration()) .withAll(inputList).withMapInputPath(getMapInputPath()).run()); if (myCombiner != null) { // User has specified a combiner. Run this and replace the mapper // outputs // with the result of the combiner. LOG.debug("Starting combine phase with combiner: " + myCombiner); mapOutputs = new ReducePhaseRunner<K2, V2, K2, V2>(inputFormatClass, getConfiguration(), counters, getOutputSerializationConfiguration(), outputFormatClass) .runReduce(sortAndGroup(mapOutputs), myCombiner); } // Run the reduce phase. LOG.debug("Starting reduce phase with reducer: " + myReducer); return new ReducePhaseRunner<K2, V2, K3, V3>(inputFormatClass, getConfiguration(), counters, getOutputSerializationConfiguration(), outputFormatClass).runReduce(sortAndGroup(mapOutputs), myReducer); } finally { cleanupDistributedCache(); } }