public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); Job job = new Job(conf); job.setJobName("Convert Text"); job.setJarByClass(Mapper.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); // increase if you need sorting or a special number of files job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path("/lol")); SequenceFileOutputFormat.setOutputPath(job, new Path("/lolz")); // submit and wait for completion job.waitForCompletion(true); }
Job job = new Job(); job.setMapperClass(MyMapper.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); return 0;
Path inputPathPattern = new Path(_inputSegmentDir); Path stagingDir = new Path(_stagingDir); Path outputDir = new Path(_outputDir); job.setJarByClass(SegmentCreationJob.class); job.setJobName(_jobName); job.waitForCompletion(true); if (!job.isSuccessful()) { throw new RuntimeException("Job failed : " + job);
try { LOG.info("Before map/reduce startup"); job = new Job(table.getConfiguration(), "process column contents"); job.setNumReduceTasks(1); Scan scan = new Scan(); table.getName().getNameAsString(), IdentityTableReducer.class, job); FileOutputFormat.setOutputPath(job, new Path("test")); LOG.info("Started " + table.getName()); assertTrue(job.waitForCompletion(true)); LOG.info("After map/reduce completion");
jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache())); job.setMapperClass(ConvertingMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); final boolean success = job.waitForCompletion(true); if (!success) { final TaskReport[] reports = job.getTaskReports(TaskType.MAP); final Path myPath = locatedFileStatus.getPath(); if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) { goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); job.setReducerClass(ScanReducer.class); job.setNumReduceTasks(1); // one to get final "first" and "last" key FileOutputFormat.setOutputPath(job, new Path(job.getJobName())); LOG.info("Started " + job.getJobName()); assertTrue(job.waitForCompletion(true)); LOG.info("After map/reduce completion - job " + jobName);
private int doVerify(Path outputDir, int numReducers) throws IOException, InterruptedException, ClassNotFoundException { job = new Job(getConf()); job.setJobName("Link Verifier"); job.setNumReduceTasks(numReducers); job.setJarByClass(getClass()); setJobScannerConf(job); Scan scan = new Scan(); scan.addColumn(FAMILY_NAME, COLUMN_PREV); scan.setCaching(10000); scan.setCacheBlocks(false); String[] split = labels.split(COMMA); scan.setAuthorizations(new Authorizations(split[this.labelIndex * 2], split[(this.labelIndex * 2) + 1])); TableMapReduceUtil.initTableMapperJob(tableName.getName(), scan, VerifyMapper.class, BytesWritable.class, BytesWritable.class, job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.setReducerClass(VerifyReducer.class); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outputDir); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
public int runCopier(String outputDir) throws Exception { Job job = null; Scan scan = null; job = new Job(getConf()); job.setJobName("Data copier"); job.getConfiguration().setInt("INDEX", labelIndex); job.getConfiguration().set("LABELS", labels); job.setJarByClass(getClass()); scan = new Scan(); scan.setCacheBlocks(false); TableMapReduceUtil.initCredentials(job); job.setNumReduceTasks(0); boolean success = job.waitForCompletion(true); return success ? 0 : 1;
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException, InterruptedException, ClassNotFoundException { final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration()); job.setInputFormatClass(clazz); job.setOutputFormatClass(NullOutputFormat.class); job.setMapperClass(ExampleVerifier.class); job.setNumReduceTasks(0); LOG.debug("submitting job."); assertTrue("job failed!", job.waitForCompletion(true)); assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue()); assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue()); assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue()); assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue()); assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue()); assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters() .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue()); }
Job job = new Job(conf); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); job.setJarByClass(Mapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); SequenceFileInputFormat.addInputPath(job, new Path("files/toMap/")); Path out = new Path("files/out/processed/"); fs.delete(out, true); job.waitForCompletion(true);
conf.setBoolean(CONF_COMPACT_MAJOR, major); Job job = new Job(conf); job.setJobName("CompactionTool"); job.setJarByClass(CompactionTool.class); job.setMapperClass(CompactionMapper.class); job.setInputFormatClass(CompactionInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); try { Path inputPath = new Path(stagingDir, "compact-"+ EnvironmentEdgeManager.currentTime()); CompactionInputFormat.createInputFile(fs, inputPath, toCompactDirs); CompactionInputFormat.addInputPath(job, inputPath); return job.waitForCompletion(true) ? 0 : 1; } finally { fs.delete(stagingDir, true);
try { LOG.info("Before map/reduce startup"); job = new Job(table.getConfiguration(), "process column contents"); job.setNumReduceTasks(1); Scan scan = new Scan(); table.getName().getNameAsString(), IdentityTableReducer.class, job); FileOutputFormat.setOutputPath(job, new Path("test")); LOG.info("Started " + table.getName().getNameAsString()); assertTrue(job.waitForCompletion(true)); LOG.info("After map/reduce completion");
Job job = new Job(util.getConfiguration()); Scan scan = new Scan(startRow, endRow); // limit the scan job.setJarByClass(util.getClass()); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), TestTableSnapshotInputFormat.class); job.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class); job.setNumReduceTasks(1); job.setOutputFormatClass(NullOutputFormat.class); Assert.assertTrue(job.waitForCompletion(true)); } finally { if (!shutdownCluster) {
private Job doVerify(Configuration conf, HTableDescriptor htd, String... auths) throws IOException, InterruptedException, ClassNotFoundException { Path outputDir = getTestDir(TEST_NAME, "verify-output"); Job job = new Job(conf); job.setJarByClass(this.getClass()); job.setJobName(TEST_NAME + " Verification for " + htd.getTableName()); setJobScannerConf(job); Scan scan = new Scan(); scan.setAuthorizations(new Authorizations(auths)); TableMapReduceUtil.initTableMapperJob(htd.getTableName().getNameAsString(), scan, VerifyMapper.class, NullWritable.class, NullWritable.class, job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING); TableMapReduceUtil.setScannerCaching(job, scannerCaching); job.setNumReduceTasks(0); FileOutputFormat.setOutputPath(job, outputDir); assertTrue(job.waitForCompletion(true)); return job; }
public static void main(String[] args) throws Exception { CommandLine cli = StressTestUtils.parseCommandLine(OPTIONS, args); Configuration configuration = new Configuration(); if (cli.hasOption(THROTTLING_SERVER_URI.getOpt())) { configuration.setBoolean(USE_THROTTLING_SERVER, true); String resourceLimited = cli.getOptionValue(RESOURCE_ID_OPT.getOpt(), "MRStressTest"); configuration.set(RESOURCE_ID, resourceLimited); configuration.set( BrokerConfigurationKeyGenerator.generateKey(new SharedRestClientFactory(), new SharedRestClientKey(RestliLimiterFactory.RESTLI_SERVICE_NAME), null, SharedRestClientFactory.SERVER_URI_KEY), cli.getOptionValue(THROTTLING_SERVER_URI.getOpt())); } if (cli.hasOption(LOCAL_QPS_OPT.getOpt())) { configuration .set(LOCALLY_ENFORCED_QPS, cli.getOptionValue(LOCAL_QPS_OPT.getOpt())); } Job job = Job.getInstance(configuration, "ThrottlingStressTest"); job.getConfiguration().setBoolean("mapreduce.job.user.classpath.first", true); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.getConfiguration().set(NUM_MAPPERS, cli.getOptionValue(NUM_MAPPERS_OPT.getOpt(), DEFAULT_MAPPERS)); StressTestUtils.populateConfigFromCli(job.getConfiguration(), cli); job.setJarByClass(MRStressTest.class); job.setMapperClass(StresserMapper.class); job.setReducerClass(AggregatorReducer.class); job.setInputFormatClass(MyInputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(DoubleWritable.class); FileOutputFormat.setOutputPath(job, new Path("/tmp/MRStressTest" + System.currentTimeMillis())); System.exit(job.waitForCompletion(true) ? 0 : 1); }
Job job = new Job(conf); job.setJobName(jobname); job.setJarByClass(ExportSnapshot.class); TableMapReduceUtil.addDependencyJars(job); job.setMapperClass(ExportMapper.class); job.setInputFormatClass(ExportSnapshotInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); if (!job.waitForCompletion(true)) { throw new ExportSnapshotException(job.getStatus().getFailureInfo());
job.setJarByClass(mapperClass); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(0); job.setMapperClass(mapperClass); job.setOutputFormatClass(NullOutputFormat.class); job.setInputFormatClass(inputFormat); boolean success = job.waitForCompletion(true);
private boolean runJob(float badRecordThreshold) throws Exception { Configuration conf = new Configuration(); conf.setFloat(HCatConstants.HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY, badRecordThreshold); Job job = new Job(conf); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setInputFormatClass(HCatInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); HCatInputFormat.setInput(job, "default", "test_bad_records"); job.setMapOutputKeyClass(HCatRecord.class); job.setMapOutputValueClass(HCatRecord.class); job.setNumReduceTasks(0); Path path = new Path(TEST_DATA_DIR, "test_bad_record_handling_output"); if (path.getFileSystem(conf).exists(path)) { path.getFileSystem(conf).delete(path, true); } TextOutputFormat.setOutputPath(job, path); return job.waitForCompletion(true); }
job.setJarByClass(MapReduceIntegrationChecker.class); job.setMapperClass(CheckerMapper.class); job.setCombinerClass(CheckerReducer.class); job.setReducerClass(CheckerReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(EmptyInputFormat.class); FileOutputFormat.setOutputPath(job, mOutputFilePath); if (!job.waitForCompletion(true)) { return 1;
Job job = new Job(conf, "hcat mapreduce read test"); job.setJarByClass(this.getClass()); job.setMapperClass(HCatMapReduceTest.MapRead.class); Path path = new Path(fs.getWorkingDirectory(), "mapred/testHCatMapReduceOutput"); if (fs.exists(path)) { fs.delete(path, true); job.waitForCompletion(true); Assert.assertEquals(readCount, MapRead.readCount);