/** * Creates a Flink {@link InputFormat} that wraps the given Hadoop {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}. * * @return A Flink InputFormat that wraps the Hadoop FileInputFormat. */ public static <K, V> org.apache.flink.api.java.hadoop.mapreduce.HadoopInputFormat<K, V> readHadoopFile( org.apache.hadoop.mapreduce.lib.input.FileInputFormat<K, V> mapreduceInputFormat, Class<K> key, Class<V> value, String inputPath, Job job) throws IOException { // set input path in Job org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new org.apache.hadoop.fs.Path(inputPath)); // return wrapping InputFormat return createHadoopInput(mapreduceInputFormat, key, value, job); }
private void configureInputAndOutputPaths(Job job) throws IOException { for (Path inputPath : getInputPaths()) { FileInputFormat.addInputPath(job, inputPath); } //MR output path must not exist when MR job starts, so delete if exists. this.tmpFs.delete(this.dataset.outputTmpPath(), true); FileOutputFormat.setOutputPath(job, this.dataset.outputTmpPath()); }
Path inputPathPattern = new Path(_inputSegmentDir); Path stagingDir = new Path(_stagingDir); Path outputDir = new Path(_outputDir); job.setMapOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(_stagingDir + "/input/")); FileOutputFormat.setOutputPath(job, new Path(_stagingDir + "/output/")); job.waitForCompletion(true); if (!job.isSuccessful()) { throw new RuntimeException("Job failed : " + job);
conf.set("hive.io.file.read.all.columns", "false"); conf.set("hive.io.file.readcolumn.ids", "1,3"); Job job = new Job(conf, "orc test"); job.setInputFormatClass(OrcNewInputFormat.class); job.setJarByClass(TestNewInputOutputFormat.class); job.setMapperClass(OrcTestMapper1.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(HiveTestUtils .getFileFromClasspath("orc-file-11-format.orc"))); Path outputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".txt"); localFs.delete(outputPath, true); FileOutputFormat.setOutputPath(job, outputPath); boolean result = job.waitForCompletion(true); assertTrue(result); Path outputFilePath = new Path(outputPath, "part-m-00000");
infoList.add(OutputJobInfo.create("default", tableNames[2], partitionValues)); Job job = new Job(hiveConf, "SampleJob"); job.setMapperClass(MyMapper.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(MultiOutputFormat.class); FileInputFormat.addInputPath(job, filePath); Assert.assertTrue(job.waitForCompletion(true)); Path partitionFile = new Path(warehousedir + "/" + tableNames[i] + "/ds=1/cluster=ag/part-m-00000"); FileSystem fs = partitionFile.getFileSystem(mrConf);
/** * Creates and runs an MR job * * @param conf * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void createAndRunJob(Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(conf); job.setJarByClass(TestLineRecordReaderJobs.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); FileInputFormat.addInputPath(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.waitForCompletion(true); }
this.job.setMapperClass(TaskRunner.class); FileInputFormat.addInputPath(this.job, this.jobInputPath); FileOutputFormat.setOutputPath(this.job, this.jobOutputPath);
@Test Job job = new Job(conf, "orc test"); job.setInputFormatClass(OrcNewInputFormat.class); job.setJarByClass(TestNewInputOutputFormat.class); job.setMapperClass(OrcTestMapper1.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(HiveTestUtils.getFileFromClasspath("orc-file-11-format.orc"))); Path outputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".txt"); localFs.delete(outputPath, true); FileOutputFormat.setOutputPath(job, outputPath); boolean result = job.waitForCompletion(true); assertTrue(result); Path outputFilePath = new Path(outputPath, "part-m-00000");
@Override public void configureJob(Job job) { job.setInputFormatClass(SequenceFileInputFormat.class); String jobId = job.getConfiguration().get(BatchConstants.ARG_CUBING_JOB_ID); IJoinedFlatTableDesc flatHiveTableDesc = new CubeJoinedFlatTableDesc(cubeSegment); String inputPath = JoinedFlatTable.getTableDir(flatHiveTableDesc, JobBuilderSupport.getJobWorkingDir(conf, jobId)); try { FileInputFormat.addInputPath(job, new Path(inputPath)); } catch (IOException e) { throw new IllegalStateException(e); } }
Job job = new Job(conf); job.setJobName("CombineSmallFilesDriver"); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); Path inputPath = new Path(args[0]); Path outputPath = new Path(args[1]); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.waitForCompletion(true); return 0;
public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJarByClass(getClass()); job.setJobName(getClass().getSimpleName()); job.setOutputKeyClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); JSONMapReduceUtil.initMapperJob(TestInputMapper.class, job); FileOutputFormat.setOutputPath(job, new Path(args[1])); JSONMapReduceUtil.initReducerJob(TestInputReducer.class, job); return job.waitForCompletion(true) ? 0 : 1; }
/** * Refer to {@link MRCompactorAvroKeyDedupJobRunner#configureInputAndOutputPaths(Job)}. * @return false if no valid input paths present for MR job to process, where a path is valid if it is * a directory containing one or more files. * */ protected boolean configureInputAndOutputPaths(Job job, FileSystemDataset dataset) throws IOException { boolean emptyDirectoryFlag = false; String mrOutputBase = this.state.getProp(MRCompactor.COMPACTION_JOB_DIR); CompactionPathParser parser = new CompactionPathParser(this.state); CompactionPathParser.CompactionParserResult rst = parser.parse(dataset); this.mrOutputPath = concatPaths (mrOutputBase, rst.getDatasetName(), rst.getDstSubDir(), rst.getTimeString()); log.info ("Cleaning temporary MR output directory: " + mrOutputPath); this.fs.delete(mrOutputPath, true); this.mapReduceInputPaths = getGranularInputPaths(dataset.datasetRoot()); if (this.mapReduceInputPaths.isEmpty()) { this.mapReduceInputPaths.add(dataset.datasetRoot()); emptyDirectoryFlag = true; } for (Path path: mapReduceInputPaths) { FileInputFormat.addInputPath(job, path); } FileOutputFormat.setOutputPath(job, mrOutputPath); return emptyDirectoryFlag; }
conf.set("hive.exec.orc.default.compress", "SNAPPY"); Path inputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".txt"); Path outputPath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc"); localFs.delete(outputPath, true); pw.close(); Job job = new Job(conf, "orc test"); job.setOutputFormatClass(OrcNewOutputFormat.class); job.setJarByClass(TestNewInputOutputFormat.class); job.setMapperClass(OrcTestMapper2.class); job.setNumReduceTasks(0); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(OrcSerdeRow.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); boolean result = job.waitForCompletion(true); assertTrue(result); Path outputFilePath = new Path(outputPath, "part-m-00000"); Reader reader = OrcFile.createReader(outputFilePath, OrcFile.readerOptions(conf).filesystem(localFs));
inp = inp.substring(0, inp.length() - 2); FileSystem fs = HadoopUtil.getWorkingFileSystem(job.getConfiguration()); Path path = new Path(inp); FileInputFormat.addInputPath(job, new Path(inp)); ret++;
Path inputPath = new Path(inputFile); Path outPath = new Path(outFile); loadMapRedConfigs(conf); Job job = new Job(conf, "MapReduce - Phoenix bulk import"); job.setJarByClass(MapReduceJob.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outPath); job.setMapperClass(MapReduceJob.PhoenixMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); job.waitForCompletion(true); loader.doBulkLoad(new Path(outFile), hDataTable);
private void addInputAndOutputPathsToFileInputFormat() throws IOException { for (Path path : inputPaths) { FileInputFormat.addInputPath(job, path); } FileOutputFormat.setOutputPath(job, tmpPath); }
@Override public List<WorkUnit> getWorkunits(SourceState state) { List<String> dirs = Splitter.on(",").splitToList(state.getProp(INPUT_DIRECTORIES_KEY)); String outputBase = state.getProp(OUTPUT_LOCATION); List<WorkUnit> workUnits = Lists.newArrayList(); for (String dir : dirs) { try { Path input = new Path(dir); Path output = new Path(outputBase, input.getName()); WorkUnit workUnit = new WorkUnit(); TaskUtils.setTaskFactoryClass(workUnit, MRTaskFactory.class); Configuration conf = new Configuration(); Job job = Job.getInstance(conf, "WordCount_" + input.getName()); job.setJarByClass(MRTaskFactoryTest.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, input); FileOutputFormat.setOutputPath(job, output); MRTask.serializeJobToState(workUnit, job); workUnits.add(workUnit); } catch (IOException ioe) { log.error("Failed to create MR job for " + dir, ioe); } } return workUnits; }
String job_id = getOptionValue(OPTION_CUBING_JOB_ID); String cubeName = getOptionValue(OPTION_CUBE_NAME); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); Path path = new Path(input.toString() + "/" + tblColRef.getIdentity()); if (HadoopUtil.getFileSystem(path).exists(path)) { FileInputFormat.addInputPath(job, path); hasUHCValue = true;
public Job createJob(Class<? extends Mapper> mapperClass,Class<? extends WritableComparable> mapperOutputKeyClass,Class<? extends WritableComparable> mapperOutputValueClass,Class<? extends InputFormat> inputFormatClass,String[] inputFilePaths,String outputFilePath) throws IOException { Job job = new Job(); job.setMapperClass(mapperClass); job.setMapOutputKeyClass(mapperOutputKeyClass); job.setMapOutputValueClass(mapperOutputValueClass); for(String inputFilePath : inputFilePaths) { FileInputFormat.addInputPath(job, new Path(inputFilePath)); } FileOutputFormat.setOutputPath(job, new Path(outputFilePath)); job.setInputFormatClass(inputFormatClass); return job; } public Job createJob(Class<? extends Mapper> mapperClass,Class<? extends Reducer> reducerClass,Class<? extends WritableComparable> mapperOutputKeyClass,Class<? extends WritableComparable> mapperOutputValueClass,Class<? extends WritableComparable> outputKeyClass,Class <? extends WritableComparable> outputValueClass,Class< ? extends InputFormat> inputFormatClass,String[] inputFilePaths,String outputFilePath) throws IOException
private void configureInputAndOutputPaths(Job job) throws IOException { for (Path inputPath : getInputPaths()) { FileInputFormat.addInputPath(job, inputPath); } //MR output path must not exist when MR job starts, so delete if exists. this.fs.delete(this.dataset.outputTmpPath(), true); FileOutputFormat.setOutputPath(job, this.dataset.outputTmpPath()); }