private static void addInputPath(Job job, Iterable<String> pathStrings, Class<? extends InputFormat> inputFormatClass) { Configuration conf = job.getConfiguration(); StringBuilder inputFormats = new StringBuilder( StringUtils.nullToEmptyNonDruidDataString(conf.get(MultipleInputs.DIR_FORMATS)) ); String[] paths = Iterables.toArray(pathStrings, String.class); for (int i = 0; i < paths.length - 1; i++) { if (inputFormats.length() > 0) { inputFormats.append(','); } inputFormats.append(paths[i]).append(';').append(inputFormatClass.getName()); } if (inputFormats.length() > 0) { conf.set(MultipleInputs.DIR_FORMATS, inputFormats.toString()); } // add last one separately for possible initialization in MultipleInputs MultipleInputs.addInputPath(job, new Path(paths[paths.length - 1]), inputFormatClass); }
MultipleInputs.addInputPath(job, new Path("/dummy/tobe/ignored"), DatasourceInputFormat.class); return job;
MultipleInputs.addInputPath(job, transactions, TextInputFormat.class, LeftJoinTransactionMapper.class); MultipleInputs.addInputPath(job, users, TextInputFormat.class, LeftJoinUserMapper.class);
public static void initJoinMRJob(Job job, String prospectsPath, String spoPath, Class<? extends Mapper<CompositeType,TripleCard,?,?>> mapperClass, String outPath, String auths) throws AccumuloSecurityException { MultipleInputs.addInputPath(job, new Path(prospectsPath), SequenceFileInputFormat.class, mapperClass); MultipleInputs.addInputPath(job, new Path(spoPath), SequenceFileInputFormat.class, mapperClass); job.setMapOutputKeyClass(CompositeType.class); job.setMapOutputValueClass(TripleCard.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outPath)); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(TripleEntry.class); job.setOutputValueClass(CardList.class); }
/** * Set up the MapReduce job to use Accumulo as an input. * @param tableMapper Mapper class to use */ protected void configureAccumuloInput(Class<? extends Mapper<Key,Value,?,?>> tableMapper) throws AccumuloSecurityException { MRReasoningUtils.configureAccumuloInput(job); MultipleInputs.addInputPath(job, new Path("/tmp/input"), AccumuloInputFormat.class, tableMapper); }
/** * Set up the MapReduce job to use an RDF file as an input. * @param rdfMapper class to use */ protected void configureRdfInput(Path inputPath, Class<? extends Mapper<LongWritable, RyaStatementWritable, ?, ?>> rdfMapper) { Configuration conf = job.getConfiguration(); String format = conf.get(MRUtils.FORMAT_PROP, RDFFormat.RDFXML.getName()); conf.set(MRUtils.FORMAT_PROP, format); MultipleInputs.addInputPath(job, inputPath, RdfFileInputFormat.class, rdfMapper); }
/** * Add a {@link Path} with a custom {@link InputFormat} and * {@link Mapper} to the list of inputs for the map-reduce job. * * @param job The {@link Job} * @param path {@link Path} to be added to the list of inputs for the job * @param inputFormatClass {@link InputFormat} class to use for this path * @param mapperClass {@link Mapper} class to use for this path */ @SuppressWarnings("unchecked") public static void addInputPath(Job job, Path path, Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass) { addInputPath(job, path, inputFormatClass); Configuration conf = job.getConfiguration(); String mapperMapping = path.toString() + ";" + mapperClass.getName(); String mappers = conf.get(DIR_MAPPERS); conf.set(DIR_MAPPERS, mappers == null ? mapperMapping : mappers + "," + mapperMapping); job.setMapperClass(DelegatingMapper.class); }
/** * Add a {@link Path} with a custom {@link InputFormat} and * {@link Mapper} to the list of inputs for the map-reduce job. * * @param job The {@link Job} * @param path {@link Path} to be added to the list of inputs for the job * @param inputFormatClass {@link InputFormat} class to use for this path * @param mapperClass {@link Mapper} class to use for this path */ @SuppressWarnings("unchecked") public static void addInputPath(Job job, Path path, Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass) { addInputPath(job, path, inputFormatClass); Configuration conf = job.getConfiguration(); String mapperMapping = path.toString() + ";" + mapperClass.getName(); String mappers = conf.get(DIR_MAPPERS); conf.set(DIR_MAPPERS, mappers == null ? mapperMapping : mappers + "," + mapperMapping); job.setMapperClass(DelegatingMapper.class); }
@SuppressWarnings("unchecked") public void testAddInputPathWithFormat() throws IOException { final Job conf = Job.getInstance(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); }
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "PostCommentHeirarchy"); job.setJarByClass(PostCommentHierarchy.class); MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, PostMapper.class); MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, CommentMapper.class); job.setReducerClass(PostCommentHierarchyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(args[2])); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); return job.waitForCompletion(true) ? 0 : 2; }
private static void addInputPath(Job job, Iterable<String> pathStrings, Class<? extends InputFormat> inputFormatClass) { Configuration conf = job.getConfiguration(); StringBuilder inputFormats = new StringBuilder(Strings.nullToEmpty(conf.get(MultipleInputs.DIR_FORMATS))); String[] paths = Iterables.toArray(pathStrings, String.class); for (int i = 0; i < paths.length - 1; i++) { if (inputFormats.length() > 0) { inputFormats.append(','); } inputFormats.append(paths[i]).append(';').append(inputFormatClass.getName()); } if (inputFormats.length() > 0) { conf.set(MultipleInputs.DIR_FORMATS, inputFormats.toString()); } // add last one separately for possible initialization in MultipleInputs MultipleInputs.addInputPath(job, new Path(paths[paths.length - 1]), inputFormatClass); }
/** * Add a {@link Path} with a custom {@link InputFormat} and * {@link Mapper} to the list of inputs for the map-reduce job. * * @param job The {@link Job} * @param path {@link Path} to be added to the list of inputs for the job * @param inputFormatClass {@link InputFormat} class to use for this path * @param mapperClass {@link Mapper} class to use for this path */ @SuppressWarnings("unchecked") public static void addInputPath(Job job, Path path, Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass) { addInputPath(job, path, inputFormatClass); Configuration conf = job.getConfiguration(); String mapperMapping = path.toString() + ";" + mapperClass.getName(); String mappers = conf.get(DIR_MAPPERS); conf.set(DIR_MAPPERS, mappers == null ? mapperMapping : mappers + "," + mapperMapping); job.setMapperClass(DelegatingMapper.class); }
/** * Add a {@link Path} with a custom {@link InputFormat} and * {@link Mapper} to the list of inputs for the map-reduce job. * * @param job The {@link Job} * @param path {@link Path} to be added to the list of inputs for the job * @param inputFormatClass {@link InputFormat} class to use for this path * @param mapperClass {@link Mapper} class to use for this path */ @SuppressWarnings("unchecked") public static void addInputPath(Job job, Path path, Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass) { addInputPath(job, path, inputFormatClass); Configuration conf = job.getConfiguration(); String mapperMapping = path.toString() + ";" + mapperClass.getName(); String mappers = conf.get(DIR_MAPPERS); conf.set(DIR_MAPPERS, mappers == null ? mapperMapping : mappers + "," + mapperMapping); job.setMapperClass(DelegatingMapper.class); }
/** * Add a {@link Path} with a custom {@link InputFormat} and * {@link Mapper} to the list of inputs for the map-reduce job. * * @param job The {@link Job} * @param path {@link Path} to be added to the list of inputs for the job * @param inputFormatClass {@link InputFormat} class to use for this path * @param mapperClass {@link Mapper} class to use for this path */ @SuppressWarnings("unchecked") public static void addInputPath(Job job, Path path, Class<? extends InputFormat> inputFormatClass, Class<? extends Mapper> mapperClass) { addInputPath(job, path, inputFormatClass); Configuration conf = job.getConfiguration(); String mapperMapping = path.toString() + ";" + mapperClass.getName(); String mappers = conf.get(DIR_MAPPERS); conf.set(DIR_MAPPERS, mappers == null ? mapperMapping : mappers + "," + mapperMapping); job.setMapperClass(DelegatingMapper.class); }
private static void addInputPath(Job job, Iterable<String> pathStrings, Class<? extends InputFormat> inputFormatClass) { Configuration conf = job.getConfiguration(); StringBuilder inputFormats = new StringBuilder( StringUtils.nullToEmptyNonDruidDataString(conf.get(MultipleInputs.DIR_FORMATS)) ); String[] paths = Iterables.toArray(pathStrings, String.class); for (int i = 0; i < paths.length - 1; i++) { if (inputFormats.length() > 0) { inputFormats.append(','); } inputFormats.append(paths[i]).append(';').append(inputFormatClass.getName()); } if (inputFormats.length() > 0) { conf.set(MultipleInputs.DIR_FORMATS, inputFormats.toString()); } // add last one separately for possible initialization in MultipleInputs MultipleInputs.addInputPath(job, new Path(paths[paths.length - 1]), inputFormatClass); }
protected static void runFirstJob(Path transactions, Path users, Path output, Configuration conf) throws Exception { Job job = new Job(conf); job.setJarByClass(RawMapreduce.class); job.setJobName("Raw Mapreduce Step 1"); job.setPartitionerClass(SecondarySort.SSPartitioner.class); job.setGroupingComparatorClass(SecondarySort.SSGroupComparator.class); job.setSortComparatorClass(SecondarySort.SSSortComparator.class); job.setReducerClass(JoinReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); MultipleInputs.addInputPath(job, transactions, TextInputFormat.class, TransactionMapper.class); MultipleInputs.addInputPath(job, users, TextInputFormat.class, UserMapper.class); job.setMapOutputKeyClass(TextTuple.class); job.setMapOutputValueClass(TextTuple.class); FileOutputFormat.setOutputPath(job, output); if (job.waitForCompletion(true)) return; else throw new Exception("First Job Failed"); }
@SuppressWarnings("unchecked") public void testAddInputPathWithMapper() throws IOException { final Job conf = Job.getInstance(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class, MapClass.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class, KeyValueMapClass.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); final Map<Path, Class<? extends Mapper>> maps = MultipleInputs .getMapperTypeMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); assertEquals(MapClass.class, maps.get(new Path("/foo"))); assertEquals(KeyValueMapClass.class, maps.get(new Path("/bar"))); }
@SuppressWarnings("unchecked") public void testAddInputPathWithFormat() throws IOException { final Job conf = Job.getInstance(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); }
@Override public void process(Annotation annotation, Job job, Object target) throws ToolException { for (Input input : ((MultiInput)annotation).value()) { Path path = getInputAsPath(input.path()); if (input.mapper() == Mapper.class) { MultipleInputs.addInputPath(job, path, input.format()); } else { MultipleInputs.addInputPath(job, path, input.format(), input.mapper()); // Need to call again here so the call is captured by our aspect which // will replace it with the annotated delegating mapper class for resource // injection if required. job.setMapperClass(DelegatingMapper.class); } } }
@SuppressWarnings("unchecked") public void testAddInputPathWithMapper() throws IOException { final Job conf = Job.getInstance(); MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.class, MapClass.class); MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.class, KeyValueMapClass.class); final Map<Path, InputFormat> inputs = MultipleInputs .getInputFormatMap(conf); final Map<Path, Class<? extends Mapper>> maps = MultipleInputs .getMapperTypeMap(conf); assertEquals(TextInputFormat.class, inputs.get(new Path("/foo")).getClass()); assertEquals(KeyValueTextInputFormat.class, inputs.get(new Path("/bar")) .getClass()); assertEquals(MapClass.class, maps.get(new Path("/foo"))); assertEquals(KeyValueMapClass.class, maps.get(new Path("/bar"))); }