private void setupMapper(Path input) throws IOException { FileInputFormat.setInputPaths(job, input); job.setMapperClass(CalculateStatsFromBaseCuboidMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); }
/** * Job configuration. */ public static Job configureJob(Configuration conf, String [] args) throws IOException { Path inputPath = new Path(args[0]); String tableName = args[1]; Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(Uploader.class); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(Uploader.class); // No reducers. Just write straight to table. Call initTableReducerJob // because it sets up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); return job; }
public int runGenerator(int numMappers, long numNodes, Path tmpOutput, Integer width, Integer wrapMultiplier, Integer numWalkers) throws Exception { LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes); createSchema(); job = Job.getInstance(getConf()); job.setJobName("Link Generator"); job.setNumReduceTasks(0); job.setJarByClass(getClass()); FileInputFormat.setInputPaths(job, tmpOutput); job.setInputFormatClass(OneFilePerMapperSFIF.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers); setMapperForGenerator(job); job.setOutputFormatClass(NullOutputFormat.class); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); TableMapReduceUtil.initCredentials(job); boolean success = jobCompletion(job); return success ? 0 : 1; }
/** * Gets fully configured Job instance. * * @param input Input file name. * @param output Output directory name. * @return Job instance. * @throws IOException If fails. */ public static Job getJob(String input, String output) throws IOException { Job job = Job.getInstance(); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); setTasksClasses(job, true, true, true, false); FileInputFormat.setInputPaths(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.setJarByClass(HadoopWordCount2.class); return job; }
TableName tableName = TableName.valueOf(args[0]); conf.set(TABLE_NAME, tableName.getNameAsString()); Path inputDir = new Path(args[1]); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(Importer.class); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(SequenceFileInputFormat.class); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); RegionLocator regionLocator = conn.getRegionLocator(tableName)) { HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator); job.setMapperClass(CellSortImporter.class); job.setReducerClass(CellReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(CellWritableComparable.class); job.setMapOutputValueClass(MapReduceExtendedCell.class); RawComparator.class); Path partitionsPath = new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration())); FileSystem fs = FileSystem.get(job.getConfiguration()); fs.deleteOnExit(partitionsPath); job.setMapperClass(Importer.class); TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job); job.setNumReduceTasks(0);
/** * Prepare job with mappers to cancel. * @return Fully configured job. * @throws Exception If fails. */ private Configuration prepareJobForCancelling() throws Exception { prepareFile("/testFile", 1500); executedTasks.set(0); cancelledTasks.set(0); failMapperId.set(0); splitsCount.set(0); Configuration cfg = new Configuration(); setupFileSystems(cfg); Job job = Job.getInstance(cfg); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(CancellingTestMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(InFormat.class); FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@/")); FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@/output/")); job.setJarByClass(getClass()); return job.getConfiguration(); }
Path inputDir = new Path(args[1]); String jobName = conf.get(JOB_NAME_CONF_KEY,NAME + "_" + tableName.getNameAsString()); job = Job.getInstance(conf, jobName); job.setJarByClass(mapperClass); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(mapperClass); job.setMapOutputKeyClass(ImmutableBytesWritable.class); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); job.setReducerClass(TextSortReducer.class); } else { job.setMapOutputValueClass(Put.class); job.setCombinerClass(PutCombiner.class); job.setReducerClass(PutSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
FileInputFormat.setInputPaths(job, new Path(input)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(InMemCuboidFromBaseCuboidMapper.class); job.setMapOutputKeyClass(ByteArrayWritable.class); job.setMapOutputValueClass(ByteArrayWritable.class); job.setReducerClass(InMemCuboidFromBaseCuboidReducer.class); job.setNumReduceTasks(MapReduceUtil.getInmemCubingReduceTaskNum(cubeSeg, cuboidScheduler)); job.setOutputValueClass(Text.class); Path outputPath = new Path(output); FileOutputFormat.setOutputPath(job, outputPath);
String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT); String segmentID = getOptionValue(OPTION_SEGMENT_ID); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); job.setMapperClass(UpdateOldCuboidShardMapper.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output);
String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT); String segmentID = getOptionValue(OPTION_SEGMENT_ID); Path input = new Path(getOptionValue(OPTION_INPUT_PATH)); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); job.setMapperClass(FilterRecommendCuboidDataMapper.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output);
public void checkInputFormat() throws Exception { Job job = new Job(); WordCountUtil wordCountUtil = new WordCountUtil("trevniMapReduceKeyValueTest"); job.setMapperClass(Counter.class); FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/out/*")); job.setInputFormatClass(AvroTrevniKeyValueInputFormat.class); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); total = 0; job.waitForCompletion(true); assertEquals(WordCountUtil.TOTAL, total); } }
/** * Configures the Hadoop MapReduce job. * * @return Instance of the Hadoop MapRed job. * @throws IOException If failed. */ @SuppressWarnings("deprecation") private Job createConfigBasedHadoopJob() throws IOException { Job jobCfg = new Job(); Configuration cfg = jobCfg.getConfiguration(); // Use explicit configuration of distributed file system, if provided. cfg.addResource(U.resolveIgniteUrl(DFS_CFG)); jobCfg.setJobName("HadoopPopularWordExample"); jobCfg.setJarByClass(HadoopPopularWords.class); jobCfg.setInputFormatClass(TextInputFormat.class); jobCfg.setOutputKeyClass(Text.class); jobCfg.setOutputValueClass(IntWritable.class); jobCfg.setMapperClass(TokenizingMapper.class); jobCfg.setReducerClass(TopNWordsReducer.class); FileInputFormat.setInputPaths(jobCfg, BOOKS_DFS_DIR); FileOutputFormat.setOutputPath(jobCfg, RESULT_DFS_DIR); // Local job tracker allows the only task per wave, but text input format // replaces it with the calculated value based on input split size option. if ("local".equals(cfg.get("mapred.job.tracker", "local"))) { // Split job into tasks using 32MB split size. FileInputFormat.setMinInputSplitSize(jobCfg, 32L * 1024 * 1024); FileInputFormat.setMaxInputSplitSize(jobCfg, Long.MAX_VALUE); } return jobCfg; }
/** * @throws Exception If failed. */ @Test public void testMapperException() throws Exception { prepareFile("/testFile", 1000); Configuration cfg = new Configuration(); cfg.setStrings("fs.igfs.impl", IgniteHadoopFileSystem.class.getName()); Job job = Job.getInstance(cfg); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(FailMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@/")); FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@/output/")); job.setJarByClass(getClass()); final IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 3), createJobInfo(job.getConfiguration(), null)); GridTestUtils.assertThrows(log, new Callable<Object>() { @Override public Object call() throws Exception { fut.get(); return null; } }, IgniteCheckedException.class, null); }
/** * @throws Exception If failed. */ @Test public void testMapRun() throws Exception { int lineCnt = 10000; String fileName = "/testFile"; prepareFile(fileName, lineCnt); totalLineCnt.set(0); taskWorkDirs.clear(); Configuration cfg = new Configuration(); cfg.setStrings("fs.igfs.impl", IgniteHadoopFileSystem.class.getName()); Job job = Job.getInstance(cfg); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@/")); FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@/output/")); job.setJarByClass(getClass()); IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 1), createJobInfo(job.getConfiguration(), null)); fut.get(); assertEquals(lineCnt, totalLineCnt.get()); assertEquals(32, taskWorkDirs.size()); }
public void checkOutputFormat() throws Exception { Job job = new Job(); WordCountUtil wordCountUtil = new WordCountUtil("trevniMapReduceKeyTest", "part-r-00000"); wordCountUtil.writeLinesFile(); AvroJob.setInputKeySchema(job, STRING); AvroJob.setOutputKeySchema(job, Pair.getPairSchema(STRING,LONG)); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in")); FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out")); FileOutputFormat.setCompressOutput(job, true); job.setInputFormatClass(AvroKeyInputFormat.class); job.setOutputFormatClass(AvroTrevniKeyOutputFormat.class); job.waitForCompletion(true); wordCountUtil.validateCountsFile(); }
public void checkOutputFormat() throws Exception { Job job = new Job(); WordCountUtil wordCountUtil = new WordCountUtil("trevniMapReduceKeyValueTest", "part-r-00000"); wordCountUtil.writeLinesFile(); AvroJob.setInputKeySchema(job, STRING); AvroJob.setOutputKeySchema(job, STRING); AvroJob.setOutputValueSchema(job, LONG); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in")); FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out")); FileOutputFormat.setCompressOutput(job, true); job.setInputFormatClass(AvroKeyInputFormat.class); job.setOutputFormatClass(AvroTrevniKeyValueOutputFormat.class); job.waitForCompletion(true); wordCountUtil.validateCountsFileGenericRecord(); }
job.setOutputValueClass(IntWritable.class); job.setMapperClass(TestMapper.class); job.setCombinerClass(TestCombiner.class); job.setReducerClass(TestReducer.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@/")); FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@/output/")); job.setJarByClass(getClass());
public void checkInputFormat() throws Exception { Job job = new Job(); WordCountUtil wordCountUtil = new WordCountUtil("trevniMapReduceKeyTest"); job.setMapperClass(Counter.class); Schema subSchema = Schema.parse("{\"type\":\"record\"," + "\"name\":\"PairValue\","+ "\"fields\": [ " + "{\"name\":\"value\", \"type\":\"long\"}" + "]}"); AvroJob.setInputKeySchema(job, subSchema); FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/out/*")); job.setInputFormatClass(AvroTrevniKeyInputFormat.class); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); total = 0; job.waitForCompletion(true); assertEquals(WordCountUtil.TOTAL, total); }
/** * @throws Exception If failed. */ @Test public void testSimpleTaskSubmit() throws Exception { String testInputFile = "/test"; prepareTestFile(testInputFile); Configuration cfg = new Configuration(); setupFileSystems(cfg); Job job = Job.getInstance(cfg); job.setMapperClass(TestMapper.class); job.setCombinerClass(TestReducer.class); job.setReducerClass(TestReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/" + testInputFile)); FileOutputFormat.setOutputPath(job, new Path("igfs://:" + getTestIgniteInstanceName(0) + "@/output")); job.setJarByClass(getClass()); IgniteInternalFuture<?> fut = grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 1), createJobInfo(job.getConfiguration(), null)); fut.get(); }
/** * Creates WordCount hadoop job for API v2. * * @param inFile Input file name for the job. * @param outFile Output file name for the job. * @return Hadoop job. * @throws Exception if fails. */ @Override public HadoopJobEx getHadoopJob(String inFile, String outFile) throws Exception { Job job = Job.getInstance(); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); HadoopWordCount2.setTasksClasses(job, true, true, true, false); Configuration conf = job.getConfiguration(); setupFileSystems(conf); FileInputFormat.setInputPaths(job, new Path(inFile)); FileOutputFormat.setOutputPath(job, new Path(outFile)); job.setJarByClass(HadoopWordCount2.class); Job hadoopJob = HadoopWordCount2.getJob(inFile, outFile); HadoopDefaultJobInfo jobInfo = createJobInfo(hadoopJob.getConfiguration(), null); UUID uuid = new UUID(0, 0); HadoopJobId jobId = new HadoopJobId(uuid, 0); return jobInfo.createJob(HadoopV2Job.class, jobId, log, null, new HadoopHelperImpl()); }