public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJobName("Convert Text"); job.setJarByClass(Mapper.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); // increase if you need sorting or a special number of files job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path("/lol")); SequenceFileOutputFormat.setOutputPath(job, new Path("/lolz")); // submit and wait for completion job.waitForCompletion(true); }
private Job doVerify(Configuration conf, HTableDescriptor htd, String... auths) throws IOException, InterruptedException, ClassNotFoundException { Path outputDir = getTestDir(TEST_NAME, "verify-output"); Job job = new Job(conf); job.setJarByClass(this.getClass()); job.setJobName(TEST_NAME + " Verification for " + htd.getTableName()); setJobScannerConf(job); Scan scan = new Scan(); scan.setAuthorizations(new Authorizations(auths)); TableMapReduceUtil.initTableMapperJob(htd.getTableName().getNameAsString(), scan, VerifyMapper.class, NullWritable.class, NullWritable.class, job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING); TableMapReduceUtil.setScannerCaching(job, scannerCaching); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, outputDir); assertTrue(job.waitForCompletion(true)); return job; }
public static Job createJob( Configuration conf , String table , String cf , String accessTrackerTable , String accessTrackerColumnFamily , Long ts ) throws IOException { Job job = new Job(conf); job.setJobName("LeastRecentlyUsedPruner: Pruning " + table + ":" + cf + " since " + new SimpleDateFormat().format(new Date(ts))); System.out.println("Configuring " + job.getJobName()); job.setJarByClass(LeastRecentlyUsedPruner.class); job.getConfiguration().setLong(PrunerMapper.TIMESTAMP_CONF, ts); job.getConfiguration().set(PrunerMapper.ACCESS_TRACKER_NAME_CONF, table); job.getConfiguration().set(PrunerMapper.ACCESS_TRACKER_CF_CONF, accessTrackerColumnFamily); job.getConfiguration().set(PrunerMapper.ACCESS_TRACKER_TABLE_CONF, accessTrackerTable); setupHBaseJob(job, table, cf); job.setNumReduceTasks(0); return job; }
job.setJobName("CombineSmallFilesDriver"); job.setInputFormatClass(CustomCFIF.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.waitForCompletion(true); return 0;
job.setJarByClass(mapperClass); job.setJobName(jobName); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setMapperClass(mapperClass); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(inputFormat); boolean success = job.waitForCompletion(true);
Scan scan = null; job = new Job(getConf()); job.setJobName("Data copier"); job.getConfiguration().setInt("INDEX", labelIndex); job.getConfiguration().set("LABELS", labels); job.setJarByClass(getClass()); scan = new Scan(); scan.setCacheBlocks(false); TableMapReduceUtil.initCredentials(job); job.setNumReduceTasks(0); boolean success = job.waitForCompletion(true); return success ? 0 : 1;
public KafkaCollector(Properties props, String jobName, String topicName) throws IOException { this.jobName = jobName; this.props = props; this.topicName = topicName; this.targetFileSize = props.containsKey(TARGET_FILE_SIZE) ? Long.parseLong(props.getProperty(TARGET_FILE_SIZE)) : TARGET_FILE_SIZE_DEFAULT; job = new Job(getConf()); job.setJarByClass(CamusSweeper.class); job.setJobName(jobName); for (Entry<Object, Object> pair : props.entrySet()) { String key = (String) pair.getKey(); job.getConfiguration().set(key, (String) pair.getValue()); } this.fs = FileSystem.get(job.getConfiguration()); this.inputPaths = getInputPaths(); this.tmpPath = new Path(job.getConfiguration().get(TMP_PATH)); this.outputPath = new Path(job.getConfiguration().get(DEST_PATH)); addInputAndOutputPathsToFileInputFormat(); }
private Job getVertexJobWithDefaultMapper(org.apache.hadoop.conf.Configuration c) throws IOException { Job job = Job.getInstance(c); job.setJarByClass(HadoopScanMapper.class); job.setJobName("testPartitionedVertexScan"); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(CassandraInputFormat.class); return job; }
public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJarByClass(getClass()); job.setJobName(getClass().getSimpleName()); job.setOutputKeyClass(LongWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); JSONMapReduceUtil.initMapperJob(TestInputMapper.class, job); FileOutputFormat.setOutputPath(job, new Path(args[1])); JSONMapReduceUtil.initReducerJob(TestInputReducer.class, job); return job.waitForCompletion(true) ? 0 : 1; }
private Job createJob(Properties props) throws IOException { Job job; if (getConf() == null) { setConf(new Configuration()); } populateConf(props, getConf(), log); job = new Job(getConf()); job.setJarByClass(CamusJob.class); if (job.getConfiguration().get("camus.job.name") != null) { job.setJobName(job.getConfiguration().get("camus.job.name")); } else { job.setJobName("Camus Job"); } if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { job.getConfiguration().set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } this.hadoopJob = job; return job; }
job.setJobName("CompactionTool"); job.setJarByClass(CompactionTool.class); job.setMapperClass(CompactionMapper.class); job.setInputFormatClass(CompactionInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapSpeculativeExecution(false); job.setNumReduceTasks(0); return job.waitForCompletion(true) ? 0 : 1; } finally { fs.delete(stagingDir, true);
@Override public void setupJob(final Job job, final SampleDataForSplitPoints operation, final String mapperGeneratorClassName, final Store store) throws IOException { job.setJarByClass(getClass()); job.setJobName(getJobName(mapperGeneratorClassName, new Path(operation.getOutputPath()))); setupMapper(job); setupReducer(job); setupOutput(job, operation, store); }
job.setJobName(jobname); job.setJarByClass(ExportSnapshot.class); TableMapReduceUtil.addDependencyJars(job); job.setMapperClass(ExportMapper.class); job.setInputFormatClass(ExportSnapshotInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapSpeculativeExecution(false); job.setNumReduceTasks(0); if (!job.waitForCompletion(true)) { throw new ExportSnapshotException(job.getStatus().getFailureInfo());
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { Triple<TableName, Scan, Path> arguments = ExportUtils.getArgumentsFromCommandLine(conf, args); String tableName = arguments.getFirst().getNameAsString(); Path outputDir = arguments.getThird(); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJobName(NAME + "_" + tableName); job.setJarByClass(Export.class); // Set optional scan parameters Scan s = arguments.getSecond(); IdentityTableMapper.initJob(tableName, s, IdentityTableMapper.class, job); // No reducers. Just write straight to output files. job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Result.class); FileOutputFormat.setOutputPath(job, outputDir); // job conf doesn't contain the conf so doesn't have a default fs. return job; }
job.setJarByClass(SegmentCreationJob.class); job.setJobName(_jobName); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.waitForCompletion(true); if (!job.isSuccessful()) { throw new RuntimeException("Job failed : " + job);
conf.set("user.name", user); job = new Job(conf); job.setJarByClass(LaunchMapper.class); job.setJobName(TempletonControllerJob.class.getSimpleName()); job.setMapperClass(LaunchMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setInputFormatClass(SingleInputFormat.class); job.setOutputFormatClass(of.getClass()); job.setNumReduceTasks(0);
public int runRandomInputGenerator(int numMappers, long numNodes, Path tmpOutput, Integer width, Integer wrapMultiplier, Integer numWalkers) throws Exception { LOG.info("Running RandomInputGenerator with numMappers=" + numMappers + ", numNodes=" + numNodes); Job job = Job.getInstance(getConf()); job.setJobName("Random Input Generator"); job.setNumReduceTasks(0); job.setJarByClass(getClass()); job.setInputFormatClass(GeneratorInputFormat.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(NullWritable.class); setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers); job.setMapperClass(Mapper.class); //identity mapper FileOutputFormat.setOutputPath(job, tmpOutput); job.setOutputFormatClass(SequenceFileOutputFormat.class); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), Random64.class); boolean success = jobCompletion(job); return success ? 0 : 1; }
private void doMapReduce(final Class<? extends Test> cmd) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); Path inputDir = writeInputFile(conf); conf.set(EvaluationMapTask.CMD_KEY, cmd.getName()); conf.set(EvaluationMapTask.PE_KEY, getClass().getName()); Job job = Job.getInstance(conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation"); job.setInputFormatClass(PeInputFormat.class); PeInputFormat.setInputPaths(job, inputDir); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs")); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.initCredentials(job); job.waitForCompletion(true); }
conf.set(EvaluationMapTask.PE_KEY, PerformanceEvaluation.class.getName()); Job job = Job.getInstance(conf); job.setJarByClass(PerformanceEvaluation.class); job.setJobName("HBase Performance Evaluation - " + opts.cmdName); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.setInputPaths(job, inputDir); job.setOutputValueClass(LongWritable.class); job.setMapperClass(EvaluationMapTask.class); job.setReducerClass(LongSumReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs")); job.waitForCompletion(true); return job;
/** * Configures the Hadoop MapReduce job. * * @return Instance of the Hadoop MapRed job. * @throws IOException If failed. */ @SuppressWarnings("deprecation") private Job createConfigBasedHadoopJob() throws IOException { Job jobCfg = new Job(); Configuration cfg = jobCfg.getConfiguration(); // Use explicit configuration of distributed file system, if provided. cfg.addResource(U.resolveIgniteUrl(DFS_CFG)); jobCfg.setJobName("HadoopPopularWordExample"); jobCfg.setJarByClass(HadoopPopularWords.class); jobCfg.setInputFormatClass(TextInputFormat.class); jobCfg.setOutputKeyClass(Text.class); jobCfg.setOutputValueClass(IntWritable.class); jobCfg.setMapperClass(TokenizingMapper.class); jobCfg.setReducerClass(TopNWordsReducer.class); FileInputFormat.setInputPaths(jobCfg, BOOKS_DFS_DIR); FileOutputFormat.setOutputPath(jobCfg, RESULT_DFS_DIR); // Local job tracker allows the only task per wave, but text input format // replaces it with the calculated value based on input split size option. if ("local".equals(cfg.get("mapred.job.tracker", "local"))) { // Split job into tasks using 32MB split size. FileInputFormat.setMinInputSplitSize(jobCfg, 32L * 1024 * 1024); FileInputFormat.setMaxInputSplitSize(jobCfg, Long.MAX_VALUE); } return jobCfg; }