private void setupMapper() throws IOException { job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(UHCDictionaryMapper.class); job.setMapOutputKeyClass(SelfDefineSortableKey.class); job.setMapOutputValueClass(NullWritable.class); }
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJobName("Convert Text"); job.setJarByClass(Mapper.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); // increase if you need sorting or a special number of files job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path("/lol")); SequenceFileOutputFormat.setOutputPath(job, new Path("/lolz")); // submit and wait for completion job.waitForCompletion(true); }
private void setupMapper(CubeSegment cubeSeg) throws IOException { // set the segment's offset info to job conf Map<Integer, Long> offsetStart = cubeSeg.getSourcePartitionOffsetStart(); Map<Integer, Long> offsetEnd = cubeSeg.getSourcePartitionOffsetEnd(); Integer minPartition = Collections.min(offsetStart.keySet()); Integer maxPartition = Collections.max(offsetStart.keySet()); job.getConfiguration().set(CONFIG_KAFKA_PARITION_MIN, minPartition.toString()); job.getConfiguration().set(CONFIG_KAFKA_PARITION_MAX, maxPartition.toString()); for(Integer partition: offsetStart.keySet()) { job.getConfiguration().set(CONFIG_KAFKA_PARITION_START + partition, offsetStart.get(partition).toString()); job.getConfiguration().set(CONFIG_KAFKA_PARITION_END + partition, offsetEnd.get(partition).toString()); } job.setMapperClass(KafkaFlatTableMapper.class); job.setInputFormatClass(KafkaInputFormat.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(0); }
Path inputPathPattern = new Path(_inputSegmentDir); Path stagingDir = new Path(_stagingDir); Path outputDir = new Path(_outputDir); job.setJarByClass(SegmentCreationJob.class); job.setJobName(_jobName); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.waitForCompletion(true); if (!job.isSuccessful()) { throw new RuntimeException("Job failed : " + job);
Job job = Job.getInstance(conf); Path inputDir = new Path(generateOutDir); Path outputDir = new Path(sortOutDir); job.setOutputValueClass(Text.class); job.setInputFormatClass(TeraInputFormat.class); job.setOutputFormatClass(TeraOutputFormat.class); long start = System.currentTimeMillis(); Path partFile = new Path(outputDir, PARTITION_FILENAME);
/** * Configures the Hadoop MapReduce job. * * @return Instance of the Hadoop MapRed job. * @throws IOException If failed. */ @SuppressWarnings("deprecation") private Job createConfigBasedHadoopJob() throws IOException { Job jobCfg = new Job(); Configuration cfg = jobCfg.getConfiguration(); // Use explicit configuration of distributed file system, if provided. cfg.addResource(U.resolveIgniteUrl(DFS_CFG)); jobCfg.setJobName("HadoopPopularWordExample"); jobCfg.setJarByClass(HadoopPopularWords.class); jobCfg.setInputFormatClass(TextInputFormat.class); jobCfg.setOutputKeyClass(Text.class); jobCfg.setOutputValueClass(IntWritable.class); jobCfg.setMapperClass(TokenizingMapper.class); jobCfg.setReducerClass(TopNWordsReducer.class); FileInputFormat.setInputPaths(jobCfg, BOOKS_DFS_DIR); FileOutputFormat.setOutputPath(jobCfg, RESULT_DFS_DIR); // Local job tracker allows the only task per wave, but text input format // replaces it with the calculated value based on input split size option. if ("local".equals(cfg.get("mapred.job.tracker", "local"))) { // Split job into tasks using 32MB split size. FileInputFormat.setMinInputSplitSize(jobCfg, 32L * 1024 * 1024); FileInputFormat.setMaxInputSplitSize(jobCfg, Long.MAX_VALUE); } return jobCfg; }
private Job getVertexJobWithDefaultMapper(org.apache.hadoop.conf.Configuration c) throws IOException { Job job = Job.getInstance(c); job.setJarByClass(HadoopScanMapper.class); job.setJobName("testPartitionedVertexScan"); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(CassandraInputFormat.class); return job; }
protected Job doLoad(Configuration conf, HTableDescriptor htd) throws Exception { Path outputDir = getTestDir(TEST_NAME, "load-output"); LOG.info("Load output dir: " + outputDir); NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT)); conf.set(TABLE_NAME_KEY, htd.getTableName().getNameAsString()); Job job = Job.getInstance(conf); job.setJobName(TEST_NAME + " Load for " + htd.getTableName()); job.setJarByClass(this.getClass()); setMapperClass(job); job.setInputFormatClass(NMapInputFormat.class); job.setNumReduceTasks(0); setJobScannerConf(job); FileOutputFormat.setOutputPath(job, outputDir); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); TableMapReduceUtil.initCredentials(job); assertTrue(job.waitForCompletion(true)); return job; }
/** * Job configuration. */ public static Job configureJob(Configuration conf, String [] args) throws IOException { String tableName = args[0]; String columnFamily = args[1]; System.out.println("****" + tableName); conf.set(TableInputFormat.SCAN, TableMapReduceUtil.convertScanToString(new Scan())); conf.set(TableInputFormat.INPUT_TABLE, tableName); conf.set("index.tablename", tableName); conf.set("index.familyname", columnFamily); String[] fields = new String[args.length - 2]; System.arraycopy(args, 2, fields, 0, fields.length); conf.setStrings("index.fields", fields); Job job = new Job(conf, tableName); job.setJarByClass(IndexBuilder.class); job.setMapperClass(Map.class); job.setNumReduceTasks(0); job.setInputFormatClass(TableInputFormat.class); job.setOutputFormatClass(MultiTableOutputFormat.class); return job; }
job.setInputFormatClass(CustomV2InputFormat.class); job.setOutputFormatClass(CustomV2OutputFormat.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path(igfsScheme() + inFile.toString())); FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT)); job.setJarByClass(HadoopWordCount2.class);
protected void configureMapper(Job job) { job.setInputFormatClass(AvroKeyRecursiveCombineFileInputFormat.class); job.setMapperClass(AvroKeyMapper.class); job.setMapOutputKeyClass(AvroKey.class); job.setMapOutputValueClass(AvroValue.class); }
public int runGenerator(int numMappers, long numNodes, Path tmpOutput, Integer width, Integer wrapMultiplier, Integer numWalkers) throws Exception { LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes); createSchema(); job = Job.getInstance(getConf()); job.setJobName("Link Generator"); job.setNumReduceTasks(0); job.setJarByClass(getClass()); FileInputFormat.setInputPaths(job, tmpOutput); job.setInputFormatClass(OneFilePerMapperSFIF.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers); setMapperForGenerator(job); job.setOutputFormatClass(NullOutputFormat.class); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); TableMapReduceUtil.initCredentials(job); boolean success = jobCompletion(job); return success ? 0 : 1; }
/** * Job configuration. */ public static Job configureJob(Configuration conf, String [] args) throws IOException { Path inputPath = new Path(args[0]); String tableName = args[1]; Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(Uploader.class); FileInputFormat.setInputPaths(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(Uploader.class); // No reducers. Just write straight to table. Call initTableReducerJob // because it sets up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); return job; }
private void setupMapper(Path input) throws IOException { FileInputFormat.setInputPaths(job, input); job.setMapperClass(CalculateStatsFromBaseCuboidMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); }
Job job = new Job(conf); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); job.setJarByClass(Mapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path("files/toMap/")); Path out = new Path("files/out/processed/"); fs.delete(out, true); job.waitForCompletion(true);
private void setupRandomGeneratorMapper(Job job, boolean putSortReducer) { if (putSortReducer) { job.setInputFormatClass(NMapInputFormat.class); job.setMapperClass(RandomPutGeneratingMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); } else { job.setInputFormatClass(NMapInputFormat.class); job.setMapperClass(RandomKVGeneratingMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(KeyValue.class); } }
public static void main(String[] args) throws Exception { CommandLine cli = StressTestUtils.parseCommandLine(OPTIONS, args); Configuration configuration = new Configuration(); if (cli.hasOption(THROTTLING_SERVER_URI.getOpt())) { configuration.setBoolean(USE_THROTTLING_SERVER, true); String resourceLimited = cli.getOptionValue(RESOURCE_ID_OPT.getOpt(), "MRStressTest"); configuration.set(RESOURCE_ID, resourceLimited); configuration.set( BrokerConfigurationKeyGenerator.generateKey(new SharedRestClientFactory(), new SharedRestClientKey(RestliLimiterFactory.RESTLI_SERVICE_NAME), null, SharedRestClientFactory.SERVER_URI_KEY), cli.getOptionValue(THROTTLING_SERVER_URI.getOpt())); } if (cli.hasOption(LOCAL_QPS_OPT.getOpt())) { configuration .set(LOCALLY_ENFORCED_QPS, cli.getOptionValue(LOCAL_QPS_OPT.getOpt())); } Job job = Job.getInstance(configuration, "ThrottlingStressTest"); job.getConfiguration().setBoolean("mapreduce.job.user.classpath.first", true); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.getConfiguration().set(NUM_MAPPERS, cli.getOptionValue(NUM_MAPPERS_OPT.getOpt(), DEFAULT_MAPPERS)); StressTestUtils.populateConfigFromCli(job.getConfiguration(), cli); job.setJarByClass(MRStressTest.class); job.setMapperClass(StresserMapper.class); job.setReducerClass(AggregatorReducer.class); job.setInputFormatClass(MyInputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(DoubleWritable.class); FileOutputFormat.setOutputPath(job, new Path("/tmp/MRStressTest" + System.currentTimeMillis())); System.exit(job.waitForCompletion(true) ? 0 : 1); }
job.setJarByClass(mapperClass); job.setNumReduceTasks(0); job.setMapperClass(mapperClass); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(inputFormat); boolean success = job.waitForCompletion(true);
job.setJarByClass(MapReduceIntegrationChecker.class); job.setMapperClass(CheckerMapper.class); job.setCombinerClass(CheckerReducer.class); job.setReducerClass(CheckerReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(EmptyInputFormat.class); FileOutputFormat.setOutputPath(job, mOutputFilePath); if (!job.waitForCompletion(true)) { return 1;
conf.setBoolean(CONF_COMPACT_MAJOR, major); Job job = new Job(conf); job.setJobName("CompactionTool"); job.setJarByClass(CompactionTool.class); job.setMapperClass(CompactionMapper.class); job.setInputFormatClass(CompactionInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapSpeculativeExecution(false); job.setNumReduceTasks(0); try { Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTime()); CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs); CompactionInputFormat.addInputPath(job, inputPath); return job.waitForCompletion(true) ? 0 : 1; } finally { fs.delete(stagingDir, true);