public static Map<FormatBundle, Map<Integer, List<Path>>> getFormatNodeMap(JobContext job) { Map<FormatBundle, Map<Integer, List<Path>>> formatNodeMap = Maps.newHashMap(); Configuration conf = job.getConfiguration(); for (String input : Splitter.on(RECORD_SEP).split(conf.get(CRUNCH_INPUTS))) { List<String> fields = Lists.newArrayList(SPLITTER.split(input)); FormatBundle<InputFormat> inputBundle = FormatBundle.fromSerialized(fields.get(0), InputFormat.class); if (!formatNodeMap.containsKey(inputBundle)) { formatNodeMap.put(inputBundle, Maps.<Integer, List<Path>> newHashMap()); } Integer nodeIndex = Integer.valueOf(fields.get(1)); if (!formatNodeMap.get(inputBundle).containsKey(nodeIndex)) { formatNodeMap.get(inputBundle).put(nodeIndex, Lists.<Path> newLinkedList()); } formatNodeMap.get(inputBundle).get(nodeIndex).add(new Path(fields.get(2))); } return formatNodeMap; }
public static Map<String, OutputConfig> getNamedOutputs(Configuration conf) { Map<String, OutputConfig> out = Maps.newHashMap(); String serOut = conf.get(CRUNCH_OUTPUTS); if (serOut == null || serOut.isEmpty()) { return out; } for (String input : Splitter.on(RECORD_SEP).split(conf.get(CRUNCH_OUTPUTS))) { List<String> fields = Lists.newArrayList(SPLITTER.split(input)); String name = fields.get(0); FormatBundle<OutputFormat> bundle = FormatBundle.fromSerialized(fields.get(1), conf); try { Class<?> keyClass = Class.forName(fields.get(2)); Class<?> valueClass = Class.forName(fields.get(3)); out.put(name, new OutputConfig(bundle, keyClass, valueClass)); } catch (ClassNotFoundException e) { throw new CrunchRuntimeException(e); } } return out; } private static final String BASE_OUTPUT_NAME = "mapreduce.output.basename";
public static Map<FormatBundle, Map<Integer, List<Path>>> getFormatNodeMap(JobContext job) { Map<FormatBundle, Map<Integer, List<Path>>> formatNodeMap = Maps.newHashMap(); Configuration conf = job.getConfiguration(); String crunchInputs = conf.get(CRUNCH_INPUTS); if (crunchInputs == null || crunchInputs.isEmpty()) { return ImmutableMap.of(); } for (String input : Splitter.on(RECORD_SEP).split(crunchInputs)) { List<String> fields = Lists.newArrayList(SPLITTER.split(input)); FormatBundle<InputFormat> inputBundle = FormatBundle.fromSerialized(fields.get(0), job.getConfiguration()); if (!formatNodeMap.containsKey(inputBundle)) { formatNodeMap.put(inputBundle, Maps.<Integer, List<Path>>newHashMap()); } Integer nodeIndex = Integer.valueOf(fields.get(1)); if (!formatNodeMap.get(inputBundle).containsKey(nodeIndex)) { formatNodeMap.get(inputBundle).put(nodeIndex, Lists.<Path>newLinkedList()); } List<Path> formatNodePaths = formatNodeMap.get(inputBundle).get(nodeIndex); String encodedPaths = fields.get(2); for (String encodedPath : Splitter.on(PATH_SEP).split(encodedPaths)) { formatNodePaths.add(decodePath(encodedPath)); } } return formatNodeMap; }
private static Map<String, OutputConfig> getNamedOutputs( TaskInputOutputContext<?, ?, ?, ?> context) { Map<String, OutputConfig> out = Maps.newHashMap(); Configuration conf = context.getConfiguration(); for (String input : Splitter.on(RECORD_SEP).split(conf.get(CRUNCH_OUTPUTS))) { List<String> fields = Lists.newArrayList(SPLITTER.split(input)); String name = fields.get(0); FormatBundle<OutputFormat> bundle = FormatBundle.fromSerialized(fields.get(1), OutputFormat.class); try { Class<?> keyClass = Class.forName(fields.get(2)); Class<?> valueClass = Class.forName(fields.get(3)); out.put(name, new OutputConfig(bundle, keyClass, valueClass)); } catch (ClassNotFoundException e) { throw new CrunchRuntimeException(e); } } return out; }