org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath java code examples

Refine search

/**
 * Creates a Flink {@link InputFormat} that wraps the given Hadoop {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}.
 *
 * @return A Flink InputFormat that wraps the Hadoop FileInputFormat.
 */
public static <K, V> org.apache.flink.api.java.hadoop.mapreduce.HadoopInputFormat<K, V> readHadoopFile(
    org.apache.hadoop.mapreduce.lib.input.FileInputFormat<K, V> mapreduceInputFormat, Class<K> key, Class<V> value, String inputPath, Job job) throws IOException {
  // set input path in Job
  org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new org.apache.hadoop.fs.Path(inputPath));
  // return wrapping InputFormat
  return createHadoopInput(mapreduceInputFormat, key, value, job);
}

private void configureInputAndOutputPaths(Job job) throws IOException {
 for (Path inputPath : getInputPaths()) {
  FileInputFormat.addInputPath(job, inputPath);
 }
 //MR output path must not exist when MR job starts, so delete if exists.
 this.tmpFs.delete(this.dataset.outputTmpPath(), true);
 FileOutputFormat.setOutputPath(job, this.dataset.outputTmpPath());
}

Path inputPathPattern = new Path(_inputSegmentDir);
Path stagingDir = new Path(_stagingDir);
Path outputDir = new Path(_outputDir);
job.setMapOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(_stagingDir + "/input/"));
FileOutputFormat.setOutputPath(job, new Path(_stagingDir + "/output/"));
job.waitForCompletion(true);
if (!job.isSuccessful()) {
 throw new RuntimeException("Job failed : " + job);

conf.set("hive.io.file.read.all.columns", "false");
conf.set("hive.io.file.readcolumn.ids", "1,3");
Job job = new Job(conf, "orc test");
job.setInputFormatClass(OrcNewInputFormat.class);
job.setJarByClass(TestNewInputOutputFormat.class);
job.setMapperClass(OrcTestMapper1.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(HiveTestUtils
  .getFileFromClasspath("orc-file-11-format.orc")));
Path outputPath = new Path(workDir, "TestOrcFile." +
  testCaseName.getMethodName() + ".txt");
localFs.delete(outputPath, true);
FileOutputFormat.setOutputPath(job, outputPath);
boolean result = job.waitForCompletion(true);
assertTrue(result);
Path outputFilePath = new Path(outputPath, "part-m-00000");

infoList.add(OutputJobInfo.create("default", tableNames[2], partitionValues));
Job job = new Job(hiveConf, "SampleJob");
job.setMapperClass(MyMapper.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(MultiOutputFormat.class);
FileInputFormat.addInputPath(job, filePath);
Assert.assertTrue(job.waitForCompletion(true));
 Path partitionFile = new Path(warehousedir + "/" + tableNames[i]
  + "/ds=1/cluster=ag/part-m-00000");
 FileSystem fs = partitionFile.getFileSystem(mrConf);

/**
 * Creates and runs an MR job
 *
 * @param conf
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void createAndRunJob(Configuration conf) throws IOException,
  InterruptedException, ClassNotFoundException {
 Job job = Job.getInstance(conf);
 job.setJarByClass(TestLineRecordReaderJobs.class);
 job.setMapperClass(Mapper.class);
 job.setReducerClass(Reducer.class);
 FileInputFormat.addInputPath(job, inputDir);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.waitForCompletion(true);
}

this.job.setMapperClass(TaskRunner.class);
FileInputFormat.addInputPath(this.job, this.jobInputPath);
FileOutputFormat.setOutputPath(this.job, this.jobOutputPath);

@Test
 Job job = new Job(conf, "orc test");
 job.setInputFormatClass(OrcNewInputFormat.class);
 job.setJarByClass(TestNewInputOutputFormat.class);
 job.setMapperClass(OrcTestMapper1.class);
 job.setNumReduceTasks(0);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(IntWritable.class);
 FileInputFormat.addInputPath(job,
   new Path(HiveTestUtils.getFileFromClasspath("orc-file-11-format.orc")));
 Path outputPath = new Path(workDir,
   "TestOrcFile." + testCaseName.getMethodName() + ".txt");
 localFs.delete(outputPath, true);
 FileOutputFormat.setOutputPath(job, outputPath);
 boolean result = job.waitForCompletion(true);
 assertTrue(result);
 Path outputFilePath = new Path(outputPath, "part-m-00000");

@Override
public void configureJob(Job job) {
  job.setInputFormatClass(SequenceFileInputFormat.class);
  String jobId = job.getConfiguration().get(BatchConstants.ARG_CUBING_JOB_ID);
  IJoinedFlatTableDesc flatHiveTableDesc = new CubeJoinedFlatTableDesc(cubeSegment);
  String inputPath = JoinedFlatTable.getTableDir(flatHiveTableDesc,
      JobBuilderSupport.getJobWorkingDir(conf, jobId));
  try {
    FileInputFormat.addInputPath(job, new Path(inputPath));
  } catch (IOException e) {
    throw new IllegalStateException(e);
  }
}

Job job = new Job(conf);
job.setJobName("CombineSmallFilesDriver");
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
Path inputPath = new Path(args[0]);
Path outputPath = new Path(args[1]);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
job.waitForCompletion(true);
return 0;

public int run(String[] args) throws Exception {
 Configuration conf = getConf();
 Job job = Job.getInstance(conf);
 job.setJarByClass(getClass());
 job.setJobName(getClass().getSimpleName());
 job.setOutputKeyClass(LongWritable.class);
 FileInputFormat.addInputPath(job, new Path(args[0]));
 JSONMapReduceUtil.initMapperJob(TestInputMapper.class, job);
 FileOutputFormat.setOutputPath(job, new Path(args[1]));
 JSONMapReduceUtil.initReducerJob(TestInputReducer.class, job);
 return job.waitForCompletion(true) ? 0 : 1;
}

/**
 * Refer to {@link MRCompactorAvroKeyDedupJobRunner#configureInputAndOutputPaths(Job)}.
 * @return false if no valid input paths present for MR job to process,  where a path is valid if it is
 * a directory containing one or more files.
 *
 */
protected boolean configureInputAndOutputPaths(Job job, FileSystemDataset dataset) throws IOException {
 boolean emptyDirectoryFlag = false;
 String mrOutputBase = this.state.getProp(MRCompactor.COMPACTION_JOB_DIR);
 CompactionPathParser parser = new CompactionPathParser(this.state);
 CompactionPathParser.CompactionParserResult rst = parser.parse(dataset);
 this.mrOutputPath = concatPaths (mrOutputBase, rst.getDatasetName(), rst.getDstSubDir(), rst.getTimeString());
 log.info ("Cleaning temporary MR output directory: " + mrOutputPath);
 this.fs.delete(mrOutputPath, true);
 this.mapReduceInputPaths = getGranularInputPaths(dataset.datasetRoot());
 if (this.mapReduceInputPaths.isEmpty()) {
  this.mapReduceInputPaths.add(dataset.datasetRoot());
  emptyDirectoryFlag = true;
 }
 for (Path path: mapReduceInputPaths) {
  FileInputFormat.addInputPath(job, path);
 }
 FileOutputFormat.setOutputPath(job, mrOutputPath);
 return emptyDirectoryFlag;
}

conf.set("hive.exec.orc.default.compress", "SNAPPY");
Path inputPath = new Path(workDir, "TestOrcFile." +
  testCaseName.getMethodName() + ".txt");
Path outputPath = new Path(workDir, "TestOrcFile." +
  testCaseName.getMethodName() + ".orc");
localFs.delete(outputPath, true);
pw.close();
Job job = new Job(conf, "orc test");
job.setOutputFormatClass(OrcNewOutputFormat.class);
job.setJarByClass(TestNewInputOutputFormat.class);
job.setMapperClass(OrcTestMapper2.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(OrcSerdeRow.class);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
boolean result = job.waitForCompletion(true);
assertTrue(result);
Path outputFilePath = new Path(outputPath, "part-m-00000");
Reader reader = OrcFile.createReader(outputFilePath,
  OrcFile.readerOptions(conf).filesystem(localFs));

inp = inp.substring(0, inp.length() - 2);
FileSystem fs = HadoopUtil.getWorkingFileSystem(job.getConfiguration());
Path path = new Path(inp);
FileInputFormat.addInputPath(job, new Path(inp));
ret++;

Path inputPath = new Path(inputFile);
Path outPath = new Path(outFile);
loadMapRedConfigs(conf);
Job job = new Job(conf, "MapReduce - Phoenix bulk import");
job.setJarByClass(MapReduceJob.class);
job.setInputFormatClass(TextInputFormat.class);
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outPath);
job.setMapperClass(MapReduceJob.PhoenixMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
job.waitForCompletion(true);
  loader.doBulkLoad(new Path(outFile), hDataTable);

private void addInputAndOutputPathsToFileInputFormat() throws IOException {
 for (Path path : inputPaths) {
  FileInputFormat.addInputPath(job, path);
 }
 FileOutputFormat.setOutputPath(job, tmpPath);
}

@Override
public List<WorkUnit> getWorkunits(SourceState state) {
 List<String> dirs = Splitter.on(",").splitToList(state.getProp(INPUT_DIRECTORIES_KEY));
 String outputBase = state.getProp(OUTPUT_LOCATION);
 List<WorkUnit> workUnits = Lists.newArrayList();
 for (String dir : dirs) {
  try {
   Path input = new Path(dir);
   Path output = new Path(outputBase, input.getName());
   WorkUnit workUnit = new WorkUnit();
   TaskUtils.setTaskFactoryClass(workUnit, MRTaskFactory.class);
   Configuration conf = new Configuration();
   Job job = Job.getInstance(conf, "WordCount_" + input.getName());
   job.setJarByClass(MRTaskFactoryTest.class);
   job.setMapperClass(TokenizerMapper.class);
   job.setCombinerClass(IntSumReducer.class);
   job.setReducerClass(IntSumReducer.class);
   job.setOutputKeyClass(Text.class);
   job.setOutputValueClass(IntWritable.class);
   job.setNumReduceTasks(1);
   FileInputFormat.addInputPath(job, input);
   FileOutputFormat.setOutputPath(job, output);
   MRTask.serializeJobToState(workUnit, job);
   workUnits.add(workUnit);
  } catch (IOException ioe) {
   log.error("Failed to create MR job for " + dir, ioe);
  }
 }
 return workUnits;
}

String job_id = getOptionValue(OPTION_CUBING_JOB_ID);
String cubeName = getOptionValue(OPTION_CUBE_NAME);
Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
  Path path = new Path(input.toString() + "/" + tblColRef.getIdentity());
  if (HadoopUtil.getFileSystem(path).exists(path)) {
    FileInputFormat.addInputPath(job, path);
    hasUHCValue = true;

public Job createJob(Class<? extends Mapper> mapperClass,Class<? extends WritableComparable> mapperOutputKeyClass,Class<? extends WritableComparable> mapperOutputValueClass,Class<? extends InputFormat> inputFormatClass,String[] inputFilePaths,String outputFilePath) throws IOException
{
  Job job = new Job();
  job.setMapperClass(mapperClass);
  job.setMapOutputKeyClass(mapperOutputKeyClass);
  job.setMapOutputValueClass(mapperOutputValueClass);
  for(String inputFilePath : inputFilePaths)
  {
    FileInputFormat.addInputPath(job, new Path(inputFilePath));
  }
  FileOutputFormat.setOutputPath(job, new Path(outputFilePath));
  job.setInputFormatClass(inputFormatClass);
  
  
  
  return job;
}
public Job createJob(Class<? extends Mapper> mapperClass,Class<? extends Reducer> reducerClass,Class<? extends WritableComparable> mapperOutputKeyClass,Class<? extends WritableComparable> mapperOutputValueClass,Class<? extends WritableComparable> outputKeyClass,Class <? extends WritableComparable> outputValueClass,Class< ? extends InputFormat> inputFormatClass,String[] inputFilePaths,String outputFilePath) throws IOException

private void configureInputAndOutputPaths(Job job) throws IOException {
 for (Path inputPath : getInputPaths()) {
  FileInputFormat.addInputPath(job, inputPath);
 }
 //MR output path must not exist when MR job starts, so delete if exists.
 this.fs.delete(this.dataset.outputTmpPath(), true);
 FileOutputFormat.setOutputPath(job, this.dataset.outputTmpPath());
}

Javadoc

Add a Path to the list of inputs for the map-reduce job.

Popular methods of FileInputFormat

setInputPaths
Set the array of Paths as the list of inputs for the map-reduce job.
getInputPaths
Get the list of input Paths for the map-reduce job.
getSplits
Generate the list of files and make them into FileSplits.
listStatus
List input directories. Subclasses may override to, e.g., select only files matching a regular expre
addInputPaths
Add the given comma separated paths to the list of inputs for the map-reduce job.
setMaxInputSplitSize
Set the maximum split size
isSplitable
Is the given filename splitable? Usually, true, but if the file is stream compressed, it will not be
setMinInputSplitSize
Set the minimum input split size
createRecordReader
setInputDirRecursive
computeSplitSize
getBlockIndex

Popular in Java

Finding current android device location
getResourceAsStream (ClassLoader)
notifyDataSetChanged (ArrayAdapter)
getExternalFilesDir (Context)
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
StringTokenizer (java.util)
Breaks a string into tokens; new code should probably use String#split.> // Legacy code: StringTo
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
Reference (javax.naming)
JFrame (javax.swing)
JList (javax.swing)
Top Sublime Text plugins

How to use addInputPathmethodin org.apache.hadoop.mapreduce.lib.input.FileInputFormat

Best Java code snippets using org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath (Showing top 20 results out of 936)

Refine search

How to use
addInputPath
method
in
org.apache.hadoop.mapreduce.lib.input.FileInputFormat