org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths java code examples

Refine search

private void setupMapper(Path input) throws IOException {
  FileInputFormat.setInputPaths(job, input);
  job.setMapperClass(CalculateStatsFromBaseCuboidMapper.class);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);
}

/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
 Path inputPath = new Path(args[0]);
 String tableName = args[1];
 Job job = new Job(conf, NAME + "_" + tableName);
 job.setJarByClass(Uploader.class);
 FileInputFormat.setInputPaths(job, inputPath);
 job.setInputFormatClass(SequenceFileInputFormat.class);
 job.setMapperClass(Uploader.class);
 // No reducers.  Just write straight to table.  Call initTableReducerJob
 // because it sets up the TableOutputFormat.
 TableMapReduceUtil.initTableReducerJob(tableName, null, job);
 job.setNumReduceTasks(0);
 return job;
}

public int runGenerator(int numMappers, long numNodes, Path tmpOutput,
  Integer width, Integer wrapMultiplier, Integer numWalkers)
  throws Exception {
 LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes);
 createSchema();
 job = Job.getInstance(getConf());
 job.setJobName("Link Generator");
 job.setNumReduceTasks(0);
 job.setJarByClass(getClass());
 FileInputFormat.setInputPaths(job, tmpOutput);
 job.setInputFormatClass(OneFilePerMapperSFIF.class);
 job.setOutputKeyClass(NullWritable.class);
 job.setOutputValueClass(NullWritable.class);
 setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers);
 setMapperForGenerator(job);
 job.setOutputFormatClass(NullOutputFormat.class);
 job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
 TableMapReduceUtil.addDependencyJars(job);
 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
                         AbstractHBaseTool.class);
 TableMapReduceUtil.initCredentials(job);
 boolean success = jobCompletion(job);
 return success ? 0 : 1;
}

/**
 * Gets fully configured Job instance.
 *
 * @param input Input file name.
 * @param output Output directory name.
 * @return Job instance.
 * @throws IOException If fails.
 */
public static Job getJob(String input, String output) throws IOException {
  Job job = Job.getInstance();
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  setTasksClasses(job, true, true, true, false);
  FileInputFormat.setInputPaths(job, new Path(input));
  FileOutputFormat.setOutputPath(job, new Path(output));
  job.setJarByClass(HadoopWordCount2.class);
  return job;
}

TableName tableName = TableName.valueOf(args[0]);
conf.set(TABLE_NAME, tableName.getNameAsString());
Path inputDir = new Path(args[1]);
Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
job.setJarByClass(Importer.class);
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormatClass(SequenceFileInputFormat.class);
String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
   RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
  HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
  job.setMapperClass(CellSortImporter.class);
  job.setReducerClass(CellReducer.class);
  Path outputDir = new Path(hfileOutPath);
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setMapOutputKeyClass(CellWritableComparable.class);
  job.setMapOutputValueClass(MapReduceExtendedCell.class);
    RawComparator.class);
  Path partitionsPath =
    new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration()));
  FileSystem fs = FileSystem.get(job.getConfiguration());
  fs.deleteOnExit(partitionsPath);
 job.setMapperClass(Importer.class);
 TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
 job.setNumReduceTasks(0);

/**
 * Prepare job with mappers to cancel.
 * @return Fully configured job.
 * @throws Exception If fails.
 */
private Configuration prepareJobForCancelling() throws Exception {
  prepareFile("/testFile", 1500);
  executedTasks.set(0);
  cancelledTasks.set(0);
  failMapperId.set(0);
  splitsCount.set(0);
  Configuration cfg = new Configuration();
  setupFileSystems(cfg);
  Job job = Job.getInstance(cfg);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setMapperClass(CancellingTestMapper.class);
  job.setNumReduceTasks(0);
  job.setInputFormatClass(InFormat.class);
  FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@/"));
  FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName  + "@/output/"));
  job.setJarByClass(getClass());
  return job.getConfiguration();
}

Path inputDir = new Path(args[1]);
String jobName = conf.get(JOB_NAME_CONF_KEY,NAME + "_" + tableName.getNameAsString());
job = Job.getInstance(conf, jobName);
job.setJarByClass(mapperClass);
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(mapperClass);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
   job.setReducerClass(TextSortReducer.class);
  } else {
   job.setMapOutputValueClass(Put.class);
   job.setCombinerClass(PutCombiner.class);
   job.setReducerClass(PutSortReducer.class);
   Path outputDir = new Path(hfileOutPath);
   FileOutputFormat.setOutputPath(job, outputDir);
   HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(),
     regionLocator);

FileInputFormat.setInputPaths(job, new Path(input));
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setMapperClass(InMemCuboidFromBaseCuboidMapper.class);
job.setMapOutputKeyClass(ByteArrayWritable.class);
job.setMapOutputValueClass(ByteArrayWritable.class);
job.setReducerClass(InMemCuboidFromBaseCuboidReducer.class);
job.setNumReduceTasks(MapReduceUtil.getInmemCubingReduceTaskNum(cubeSeg, cuboidScheduler));
job.setOutputValueClass(Text.class);
Path outputPath = new Path(output);
FileOutputFormat.setOutputPath(job, outputPath);

String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
String segmentID = getOptionValue(OPTION_SEGMENT_ID);
Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
job.setMapperClass(UpdateOldCuboidShardMapper.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);

String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
String segmentID = getOptionValue(OPTION_SEGMENT_ID);
Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
job.setMapperClass(FilterRecommendCuboidDataMapper.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);

 public void checkInputFormat() throws Exception {
  Job job = new Job();

  WordCountUtil wordCountUtil = new WordCountUtil("trevniMapReduceKeyValueTest");

  job.setMapperClass(Counter.class);

  FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/out/*"));
  job.setInputFormatClass(AvroTrevniKeyValueInputFormat.class);

  job.setNumReduceTasks(0);
  job.setOutputFormatClass(NullOutputFormat.class);

  total = 0;
  job.waitForCompletion(true);
  assertEquals(WordCountUtil.TOTAL, total);

 }
}

/**
 * Configures the Hadoop MapReduce job.
 *
 * @return Instance of the Hadoop MapRed job.
 * @throws IOException If failed.
 */
@SuppressWarnings("deprecation")
private Job createConfigBasedHadoopJob() throws IOException {
  Job jobCfg = new Job();
  Configuration cfg = jobCfg.getConfiguration();
  // Use explicit configuration of distributed file system, if provided.
  cfg.addResource(U.resolveIgniteUrl(DFS_CFG));
  jobCfg.setJobName("HadoopPopularWordExample");
  jobCfg.setJarByClass(HadoopPopularWords.class);
  jobCfg.setInputFormatClass(TextInputFormat.class);
  jobCfg.setOutputKeyClass(Text.class);
  jobCfg.setOutputValueClass(IntWritable.class);
  jobCfg.setMapperClass(TokenizingMapper.class);
  jobCfg.setReducerClass(TopNWordsReducer.class);
  FileInputFormat.setInputPaths(jobCfg, BOOKS_DFS_DIR);
  FileOutputFormat.setOutputPath(jobCfg, RESULT_DFS_DIR);
  // Local job tracker allows the only task per wave, but text input format
  // replaces it with the calculated value based on input split size option.
  if ("local".equals(cfg.get("mapred.job.tracker", "local"))) {
    // Split job into tasks using 32MB split size.
    FileInputFormat.setMinInputSplitSize(jobCfg, 32L * 1024 * 1024);
    FileInputFormat.setMaxInputSplitSize(jobCfg, Long.MAX_VALUE);
  }
  return jobCfg;
}

/**
 * @throws Exception If failed.
 */
@Test
public void testMapperException() throws Exception {
  prepareFile("/testFile", 1000);
  Configuration cfg = new Configuration();
  cfg.setStrings("fs.igfs.impl", IgniteHadoopFileSystem.class.getName());
  Job job = Job.getInstance(cfg);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setMapperClass(FailMapper.class);
  job.setNumReduceTasks(0);
  job.setInputFormatClass(TextInputFormat.class);
  FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@/"));
  FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName  + "@/output/"));
  job.setJarByClass(getClass());
  final IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 3),
      createJobInfo(job.getConfiguration(), null));
  GridTestUtils.assertThrows(log, new Callable<Object>() {
    @Override public Object call() throws Exception {
      fut.get();
      return null;
    }
  }, IgniteCheckedException.class, null);
}

/**
 * @throws Exception If failed.
 */
@Test
public void testMapRun() throws Exception {
  int lineCnt = 10000;
  String fileName = "/testFile";
  prepareFile(fileName, lineCnt);
  totalLineCnt.set(0);
  taskWorkDirs.clear();
  Configuration cfg = new Configuration();
  cfg.setStrings("fs.igfs.impl", IgniteHadoopFileSystem.class.getName());
  Job job = Job.getInstance(cfg);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setMapperClass(TestMapper.class);
  job.setNumReduceTasks(0);
  job.setInputFormatClass(TextInputFormat.class);
  FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@/"));
  FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName  + "@/output/"));
  job.setJarByClass(getClass());
  IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 1),
      createJobInfo(job.getConfiguration(), null));
  fut.get();
  assertEquals(lineCnt, totalLineCnt.get());
  assertEquals(32, taskWorkDirs.size());
}

public void checkOutputFormat() throws Exception {
 Job job = new Job();
 WordCountUtil wordCountUtil = new WordCountUtil("trevniMapReduceKeyTest", "part-r-00000");
 wordCountUtil.writeLinesFile();
 AvroJob.setInputKeySchema(job, STRING);
 AvroJob.setOutputKeySchema(job, Pair.getPairSchema(STRING,LONG));
 job.setMapperClass(WordCountMapper.class);
 job.setReducerClass(WordCountReducer.class);
 job.setMapOutputKeyClass(Text.class);
 job.setMapOutputValueClass(LongWritable.class);
 FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in"));
 FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out"));
 FileOutputFormat.setCompressOutput(job, true);
 job.setInputFormatClass(AvroKeyInputFormat.class);
 job.setOutputFormatClass(AvroTrevniKeyOutputFormat.class);
 job.waitForCompletion(true);
 wordCountUtil.validateCountsFile();
}

public void checkOutputFormat() throws Exception {
 Job job = new Job();
 WordCountUtil wordCountUtil = new WordCountUtil("trevniMapReduceKeyValueTest", "part-r-00000");
 wordCountUtil.writeLinesFile();
 AvroJob.setInputKeySchema(job, STRING);
 AvroJob.setOutputKeySchema(job, STRING);
 AvroJob.setOutputValueSchema(job, LONG);
 job.setMapperClass(WordCountMapper.class);
 job.setReducerClass(WordCountReducer.class);
 job.setMapOutputKeyClass(Text.class);
 job.setMapOutputValueClass(LongWritable.class);
 FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in"));
 FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out"));
 FileOutputFormat.setCompressOutput(job, true);
 job.setInputFormatClass(AvroKeyInputFormat.class);
 job.setOutputFormatClass(AvroTrevniKeyValueOutputFormat.class);
 job.waitForCompletion(true);
 wordCountUtil.validateCountsFileGenericRecord();
}

job.setOutputValueClass(IntWritable.class);
job.setMapperClass(TestMapper.class);
job.setCombinerClass(TestCombiner.class);
job.setReducerClass(TestReducer.class);
job.setInputFormatClass(TextInputFormat.class);
FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@/"));
FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName  + "@/output/"));
job.setJarByClass(getClass());

public void checkInputFormat() throws Exception {
 Job job = new Job();
 WordCountUtil wordCountUtil = new WordCountUtil("trevniMapReduceKeyTest");
 job.setMapperClass(Counter.class);
 Schema subSchema = Schema.parse("{\"type\":\"record\"," +
                 "\"name\":\"PairValue\","+
                 "\"fields\": [ " +
                 "{\"name\":\"value\", \"type\":\"long\"}" +
                 "]}");
 AvroJob.setInputKeySchema(job, subSchema);
 FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/out/*"));
 job.setInputFormatClass(AvroTrevniKeyInputFormat.class);
 job.setNumReduceTasks(0);
 job.setOutputFormatClass(NullOutputFormat.class);
 total = 0;
 job.waitForCompletion(true);
 assertEquals(WordCountUtil.TOTAL, total);
}

/**
 * @throws Exception If failed.
 */
@Test
public void testSimpleTaskSubmit() throws Exception {
  String testInputFile = "/test";
  prepareTestFile(testInputFile);
  Configuration cfg = new Configuration();
  setupFileSystems(cfg);
  Job job = Job.getInstance(cfg);
  job.setMapperClass(TestMapper.class);
  job.setCombinerClass(TestReducer.class);
  job.setReducerClass(TestReducer.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(IntWritable.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  job.setNumReduceTasks(1);
  FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/" + testInputFile));
  FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/output"));
  job.setJarByClass(getClass());
  IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 1),
    createJobInfo(job.getConfiguration(), null));
  fut.get();
}

/**
 * Creates WordCount hadoop job for API v2.
 *
 * @param inFile Input file name for the job.
 * @param outFile Output file name for the job.
 * @return Hadoop job.
 * @throws Exception if fails.
 */
@Override public HadoopJobEx getHadoopJob(String inFile, String outFile) throws Exception {
  Job job = Job.getInstance();
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  HadoopWordCount2.setTasksClasses(job, true, true, true, false);
  Configuration conf = job.getConfiguration();
  setupFileSystems(conf);
  FileInputFormat.setInputPaths(job, new Path(inFile));
  FileOutputFormat.setOutputPath(job, new Path(outFile));
  job.setJarByClass(HadoopWordCount2.class);
  Job hadoopJob = HadoopWordCount2.getJob(inFile, outFile);
  HadoopDefaultJobInfo jobInfo = createJobInfo(hadoopJob.getConfiguration(), null);
  UUID uuid = new UUID(0, 0);
  HadoopJobId jobId = new HadoopJobId(uuid, 0);
  return jobInfo.createJob(HadoopV2Job.class, jobId, log, null, new HadoopHelperImpl());
}

Javadoc

Sets the given comma separated paths as the list of inputs for the map-reduce job.

Popular methods of FileInputFormat

addInputPath
Add a Path to the list of inputs for the map-reduce job.
getInputPaths
Get the list of input Paths for the map-reduce job.
getSplits
Generate the list of files and make them into FileSplits.
listStatus
List input directories. Subclasses may override to, e.g., select only files matching a regular expre
addInputPaths
Add the given comma separated paths to the list of inputs for the map-reduce job.
setMaxInputSplitSize
Set the maximum split size
isSplitable
Is the given filename splitable? Usually, true, but if the file is stream compressed, it will not be
setMinInputSplitSize
Set the minimum input split size
createRecordReader
setInputDirRecursive
computeSplitSize
getBlockIndex

Popular in Java

Reactive rest calls using spring rest template
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
onCreateOptionsMenu (Activity)
runOnUiThread (Activity)
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
IsNull (org.hamcrest.core)
Is the value null?
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
JPanel (javax.swing)
Best IntelliJ plugins

How to use setInputPathsmethodin org.apache.hadoop.mapreduce.lib.input.FileInputFormat

Best Java code snippets using org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths (Showing top 20 results out of 954)

Refine search

How to use
setInputPaths
method
in
org.apache.hadoop.mapreduce.lib.input.FileInputFormat