private void setupReducer(Path output, CubeSegment cubeSeg) throws IOException { int hllShardBase = MapReduceUtil.getCuboidHLLCounterReducerNum(cubeSeg.getCubeInstance()); job.setReducerClass(CalculateStatsFromBaseCuboidReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(hllShardBase); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); deletePath(job.getConfiguration(), output); } }
Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + EnvironmentEdgeManager.currentTime())); job.setJarByClass(MapReduceHFileSplitterJob.class); job.setInputFormatClass(HFileInputFormat.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs); TableName tableName = TableName.valueOf(tabName); job.setMapperClass(HFileCellMapper.class); job.setReducerClass(CellSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputValueClass(MapReduceExtendedCell.class); try (Connection conn = ConnectionFactory.createConnection(conf);
protected void runJob(String jobName, Configuration c, List<Scan> scans) throws IOException, InterruptedException, ClassNotFoundException { Job job = new Job(c, jobName); initJob(scans, job); job.setReducerClass(ScanReducer.class); job.setNumReduceTasks(1); // one to get final "first" and "last" key FileOutputFormat.setOutputPath(job, new Path(job.getJobName())); LOG.info("Started " + job.getJobName()); job.waitForCompletion(true); assertTrue(job.isSuccessful()); LOG.info("After map/reduce completion - job " + jobName); }
private Job configureSubmittableJob(Job job, Path outputPath) throws Exception { Configuration conf = job.getConfiguration(); conf.setBoolean("mapreduce.job.user.classpath.first", true); HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf)); job.setJarByClass(IndexScrutinyTool.class); job.setOutputFormatClass(NullOutputFormat.class); if (outputInvalidRows && OutputFormat.FILE.equals(outputFormat)) { job.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, outputPath); } job.setMapperClass(IndexScrutinyMapper.class); job.setNumReduceTasks(0); // Set the Output classes job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); TableMapReduceUtil.addDependencyJars(job); return job; }
public static void main(String[] args) throws Exception { CommandLine cli = StressTestUtils.parseCommandLine(OPTIONS, args); Configuration configuration = new Configuration(); if (cli.hasOption(THROTTLING_SERVER_URI.getOpt())) { configuration.setBoolean(USE_THROTTLING_SERVER, true); String resourceLimited = cli.getOptionValue(RESOURCE_ID_OPT.getOpt(), "MRStressTest"); configuration.set(RESOURCE_ID, resourceLimited); configuration.set( BrokerConfigurationKeyGenerator.generateKey(new SharedRestClientFactory(), new SharedRestClientKey(RestliLimiterFactory.RESTLI_SERVICE_NAME), null, SharedRestClientFactory.SERVER_URI_KEY), cli.getOptionValue(THROTTLING_SERVER_URI.getOpt())); } if (cli.hasOption(LOCAL_QPS_OPT.getOpt())) { configuration .set(LOCALLY_ENFORCED_QPS, cli.getOptionValue(LOCAL_QPS_OPT.getOpt())); } Job job = Job.getInstance(configuration, "ThrottlingStressTest"); job.getConfiguration().setBoolean("mapreduce.job.user.classpath.first", true); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.getConfiguration().set(NUM_MAPPERS, cli.getOptionValue(NUM_MAPPERS_OPT.getOpt(), DEFAULT_MAPPERS)); StressTestUtils.populateConfigFromCli(job.getConfiguration(), cli); job.setJarByClass(MRStressTest.class); job.setMapperClass(StresserMapper.class); job.setReducerClass(AggregatorReducer.class); job.setInputFormatClass(MyInputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(DoubleWritable.class); FileOutputFormat.setOutputPath(job, new Path("/tmp/MRStressTest" + System.currentTimeMillis())); System.exit(job.waitForCompletion(true) ? 0 : 1); }
Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + System.currentTimeMillis())); job.setJarByClass(WALPlayer.class); job.setInputFormatClass(WALInputFormat.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapperClass(WALKeyValueMapper.class); job.setReducerClass(CellSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputValueClass(MapReduceExtendedCell.class); try (Connection conn = ConnectionFactory.createConnection(conf);) { } else { job.setMapperClass(WALMapper.class); job.setOutputFormatClass(MultiTableOutputFormat.class); TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, ScanMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); job.setReducerClass(ScanReducer.class); job.setNumReduceTasks(1); // one to get final "first" and "last" key FileOutputFormat.setOutputPath(job, new Path(job.getJobName())); LOG.info("Started " + job.getJobName()); assertTrue(job.waitForCompletion(true));
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; Path outputDir = new Path(args[1]); String reportSeparatorString = (args.length > 2) ? args[2]: ":"; conf.set("ReportSeparator", reportSeparatorString); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(CellCounter.class); Scan scan = getConfiguredScanForJob(conf, args); TableMapReduceUtil.initTableMapperJob(tableName, scan, CellCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(1); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, outputDir); job.setReducerClass(IntSumReducer.class); return job; }
this.job.setJarByClass(MRJobLauncher.class); this.job.setMapperClass(TaskRunner.class); this.job.setInputFormatClass(GobblinWorkUnitsInputFormat.class); this.job.setOutputFormatClass(GobblinOutputFormat.class); this.job.setMapOutputKeyClass(NullWritable.class); FileOutputFormat.setOutputPath(this.job, this.jobOutputPath);
job.setJarByClass(MapReduceIntegrationChecker.class); job.setMapperClass(CheckerMapper.class); job.setCombinerClass(CheckerReducer.class); job.setReducerClass(CheckerReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(EmptyInputFormat.class); FileOutputFormat.setOutputPath(job, mOutputFilePath);
RegionLocator regionLocator = conn.getRegionLocator(tableName)) { HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator); job.setMapperClass(CellSortImporter.class); job.setReducerClass(CellReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(CellWritableComparable.class); job.setMapOutputValueClass(MapReduceExtendedCell.class); RawComparator.class); Path partitionsPath = new Path(TotalOrderPartitioner.getPartitionFile(job.getConfiguration())); FileSystem fs = FileSystem.get(job.getConfiguration()); fs.deleteOnExit(partitionsPath); job.setMapperClass(CellImporter.class); try (Connection conn = ConnectionFactory.createConnection(conf); Table table = conn.getTable(tableName); RegionLocator regionLocator = conn.getRegionLocator(tableName)){ job.setReducerClass(CellSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(MapReduceExtendedCell.class); job.setMapperClass(Importer.class); TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job); job.setNumReduceTasks(0);
public Job createSubmittableJob(String[] args) throws IOException { Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME); generatePartitions(partitionsPath); Job job = Job.getInstance(getConf(), getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName)); Configuration jobConf = job.getConfiguration(); jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize); job.setJarByClass(HashTable.class); TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(), HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); // use a TotalOrderPartitioner and reducers to group region output into hash files job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath); job.setReducerClass(Reducer.class); // identity reducer job.setNumReduceTasks(tableHash.numHashFiles); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(ImmutableBytesWritable.class); job.setOutputFormatClass(MapFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR)); return job; }
Path inputDir = new Path(args[1]); String jobName = conf.get(JOB_NAME_CONF_KEY,NAME + "_" + tableName.getNameAsString()); job = Job.getInstance(conf, jobName); job.setJarByClass(mapperClass); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(mapperClass); job.setMapOutputKeyClass(ImmutableBytesWritable.class); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); job.setReducerClass(TextSortReducer.class); } else { job.setMapOutputValueClass(Put.class); job.setCombinerClass(PutCombiner.class); job.setReducerClass(PutSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
/** * Sets up the actual job. * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { Triple<TableName, Scan, Path> arguments = ExportUtils.getArgumentsFromCommandLine(conf, args); String tableName = arguments.getFirst().getNameAsString(); Path outputDir = arguments.getThird(); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJobName(NAME + "_" + tableName); job.setJarByClass(Export.class); // Set optional scan parameters Scan s = arguments.getSecond(); IdentityTableMapper.initJob(tableName, s, IdentityTableMapper.class, job); // No reducers. Just write straight to output files. job.setNumReduceTasks(0); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(Result.class); FileOutputFormat.setOutputPath(job, outputDir); // job conf doesn't contain the conf so doesn't have a default fs. return job; }
parseOptions(options, args); Path partitionFilePath = new Path(getOptionValue(OPTION_PARTITION_FILE_PATH)); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String cubeName = getOptionValue(OPTION_CUBE_NAME); FileOutputFormat.setOutputPath(job, output); reconfigurePartitions(configuration, partitionFilePath); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapperClass(CubeHFileMapper.class); job.setReducerClass(KeyValueReducer.class); job.setMapOutputKeyClass(RowKeyWritable.class); job.setMapOutputValueClass(KeyValue.class);
conf.addResource(new Path(jobEngineConfig.getHadoopJobConfFilePath(null))); job.getConfiguration().set(BatchConstants.CFG_TABLE_NAME, table); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set("dfs.blocksize", "67108864"); job.getConfiguration().set("mapreduce.output.fileoutputformat.compress", "false"); tableInputFormat.configureJob(job); job.setMapperClass(ColumnCardinalityMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(BytesWritable.class); job.setReducerClass(ColumnCardinalityReducer.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1);
/** * Tests an MR Scan initialized from properties set in the Configuration. */ protected void testScanFromConfiguration(String start, String stop, String last) throws IOException, InterruptedException, ClassNotFoundException { String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") + "To" + (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty"); Configuration c = new Configuration(TEST_UTIL.getConfiguration()); c.set(TableInputFormat.INPUT_TABLE, TABLE_NAME.getNameAsString()); c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILYS[0]) + ", " + Bytes.toString(INPUT_FAMILYS[1])); c.set(KEY_STARTROW, start != null ? start : ""); c.set(KEY_LASTROW, last != null ? last : ""); if (start != null) { c.set(TableInputFormat.SCAN_ROW_START, start); } if (stop != null) { c.set(TableInputFormat.SCAN_ROW_STOP, stop); } Job job = Job.getInstance(c, jobName); job.setMapperClass(ScanMapper.class); job.setReducerClass(ScanReducer.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(ImmutableBytesWritable.class); job.setInputFormatClass(TableInputFormat.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, new Path(job.getJobName())); TableMapReduceUtil.addDependencyJars(job); assertTrue(job.waitForCompletion(true)); }
/** * Prepare job with mappers to cancel. * @return Fully configured job. * @throws Exception If fails. */ private Configuration prepareJobForCancelling() throws Exception { prepareFile("/testFile", 1500); executedTasks.set(0); cancelledTasks.set(0); failMapperId.set(0); splitsCount.set(0); Configuration cfg = new Configuration(); setupFileSystems(cfg); Job job = Job.getInstance(cfg); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(CancellingTestMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(InFormat.class); FileInputFormat.setInputPaths(job, new Path("igfs://" + igfsName + "@/")); FileOutputFormat.setOutputPath(job, new Path("igfs://" + igfsName + "@/output/")); job.setJarByClass(getClass()); return job.getConfiguration(); }
parseOptions(options, args); Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH)); String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT); String tableName = getOptionValue(OPTION_TABLE_NAME); FileOutputFormat.setOutputPath(job, output); job.setMapperClass(LookupTableToHFileMapper.class); job.setReducerClass(KVSortReducerWithDupKeyCheck.class);
/** * Gets fully configured Job instance. * * @param input Input file name. * @param output Output directory name. * @return Job instance. * @throws IOException If fails. */ public static Job getJob(String input, String output) throws IOException { Job job = Job.getInstance(); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); setTasksClasses(job, true, true, true, false); FileInputFormat.setInputPaths(job, new Path(input)); FileOutputFormat.setOutputPath(job, new Path(output)); job.setJarByClass(HadoopWordCount2.class); return job; }