@Override public void configure(Job job) throws IOException { job.getConfiguration().set(Hadoop1Compat.CFG_JOB_JAR, mapredJar); } }
/** * Sets the job output value schema. * * @param job The job to configure. * @param schema The job output value schema. */ public static void setOutputValueSchema(Job job, Schema schema) { job.setOutputValueClass(AvroValue.class); job.getConfiguration().set(CONF_OUTPUT_VALUE_SCHEMA, schema.toString()); }
/** * Sets the job output key schema. * * @param job The job to configure. * @param schema The job output key schema. */ public static void setOutputKeySchema(Job job, Schema schema) { job.setOutputKeyClass(AvroKey.class); job.getConfiguration().set(CONF_OUTPUT_KEY_SCHEMA, schema.toString()); }
private void setupMapper(CubeSegment cubeSeg) throws IOException { // set the segment's offset info to job conf Map<Integer, Long> offsetStart = cubeSeg.getSourcePartitionOffsetStart(); Map<Integer, Long> offsetEnd = cubeSeg.getSourcePartitionOffsetEnd(); Integer minPartition = Collections.min(offsetStart.keySet()); Integer maxPartition = Collections.max(offsetStart.keySet()); job.getConfiguration().set(CONFIG_KAFKA_PARITION_MIN, minPartition.toString()); job.getConfiguration().set(CONFIG_KAFKA_PARITION_MAX, maxPartition.toString()); for(Integer partition: offsetStart.keySet()) { job.getConfiguration().set(CONFIG_KAFKA_PARITION_START + partition, offsetStart.get(partition).toString()); job.getConfiguration().set(CONFIG_KAFKA_PARITION_END + partition, offsetEnd.get(partition).toString()); } job.setMapperClass(KafkaFlatTableMapper.class); job.setInputFormatClass(KafkaInputFormat.class); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(0); }
addDependencies(this.job.getConfiguration()); this.job.setMapperClass(TaskRunner.class); this.job.setNumReduceTasks(0); this.job.setInputFormatClass(GobblinWorkUnitsInputFormat.class); this.job.setOutputFormatClass(GobblinOutputFormat.class); this.job.setMapOutputKeyClass(NullWritable.class); this.job.setMapOutputValueClass(NullWritable.class); this.job.getConfiguration().set("mapreduce.job.user.classpath.first", "true");
public static void main(String[] args) throws Exception { CommandLine cli = StressTestUtils.parseCommandLine(OPTIONS, args); Configuration configuration = new Configuration(); if (cli.hasOption(THROTTLING_SERVER_URI.getOpt())) { configuration.setBoolean(USE_THROTTLING_SERVER, true); String resourceLimited = cli.getOptionValue(RESOURCE_ID_OPT.getOpt(), "MRStressTest"); configuration.set(RESOURCE_ID, resourceLimited); configuration.set( BrokerConfigurationKeyGenerator.generateKey(new SharedRestClientFactory(), new SharedRestClientKey(RestliLimiterFactory.RESTLI_SERVICE_NAME), null, SharedRestClientFactory.SERVER_URI_KEY), cli.getOptionValue(THROTTLING_SERVER_URI.getOpt())); } if (cli.hasOption(LOCAL_QPS_OPT.getOpt())) { configuration .set(LOCALLY_ENFORCED_QPS, cli.getOptionValue(LOCAL_QPS_OPT.getOpt())); } Job job = Job.getInstance(configuration, "ThrottlingStressTest"); job.getConfiguration().setBoolean("mapreduce.job.user.classpath.first", true); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); job.getConfiguration().set(NUM_MAPPERS, cli.getOptionValue(NUM_MAPPERS_OPT.getOpt(), DEFAULT_MAPPERS)); StressTestUtils.populateConfigFromCli(job.getConfiguration(), cli); job.setJarByClass(MRStressTest.class); job.setMapperClass(StresserMapper.class); job.setReducerClass(AggregatorReducer.class); job.setInputFormatClass(MyInputFormat.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(DoubleWritable.class); FileOutputFormat.setOutputPath(job, new Path("/tmp/MRStressTest" + System.currentTimeMillis())); System.exit(job.waitForCompletion(true) ? 0 : 1); }
@VisibleForTesting static void serializeJobState(FileSystem fs, Path mrJobDir, Configuration conf, JobState jobState, Job job) throws IOException { Path jobStateFilePath = new Path(mrJobDir, JOB_STATE_FILE_NAME); // Write the job state with an empty task set (work units are read by the mapper from a different file) try (DataOutputStream dataOutputStream = new DataOutputStream(fs.create(jobStateFilePath))) { jobState.write(dataOutputStream, false, conf.getBoolean(SERIALIZE_PREVIOUS_WORKUNIT_STATES_KEY, DEFAULT_SERIALIZE_PREVIOUS_WORKUNIT_STATES)); } job.getConfiguration().set(ConfigurationKeys.JOB_STATE_FILE_PATH_KEY, jobStateFilePath.toString()); DistributedCache.addCacheFile(jobStateFilePath.toUri(), job.getConfiguration()); job.getConfiguration().set(ConfigurationKeys.JOB_STATE_DISTRIBUTED_CACHE_NAME, jobStateFilePath.getName()); }
@Override public void configureJobOutput(Job job, String output, CubeSegment segment) throws Exception { int reducerNum = MapReduceUtil.getLayeredCubingReduceTaskNum(segment, segment.getCuboidScheduler(), AbstractHadoopJob.getTotalMapInputMB(job), -1); job.setNumReduceTasks(reducerNum); Path outputPath = new Path(output); HadoopUtil.deletePath(job.getConfiguration(), outputPath); FileOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(SequenceFileOutputFormat.class); }
String tabName = args[1]; conf.setStrings(TABLES_KEY, tabName); conf.set(FileInputFormat.INPUT_DIR, inputDirs); Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + EnvironmentEdgeManager.currentTime())); job.setJarByClass(MapReduceHFileSplitterJob.class); job.setInputFormatClass(HFileInputFormat.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY); LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs); TableName tableName = TableName.valueOf(tabName); job.setMapperClass(HFileCellMapper.class); job.setReducerClass(CellSortReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputValueClass(MapReduceExtendedCell.class); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class); } else {
Path betaInput = new Path(inputPath); FileSystem fs = betaInput.getFileSystem(job.getConfiguration()); Set<String> paths = new TreeSet<>(); Pattern fileMatcher = Pattern.compile(filePattern); Path granularPath = new Path(betaInput, intervalPath); log.info("Checking path[%s]", granularPath); for (FileStatus status : FSSpideringIterator.spiderIterable(fs, granularPath)) {
public Job createSubmittableJob(String[] args) throws IOException { Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME); generatePartitions(partitionsPath); Job job = Job.getInstance(getConf(), getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName)); Configuration jobConf = job.getConfiguration(); jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize); job.setJarByClass(HashTable.class); TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(), HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job); // use a TotalOrderPartitioner and reducers to group region output into hash files job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath); job.setReducerClass(Reducer.class); // identity reducer job.setNumReduceTasks(tableHash.numHashFiles); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(ImmutableBytesWritable.class); job.setOutputFormatClass(MapFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR)); return job; }
Job job = Job.getInstance(conf); Path inputDir = new Path(generateOutDir); Path outputDir = new Path(sortOutDir); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TeraInputFormat.class); job.setOutputFormatClass(TeraOutputFormat.class); long start = System.currentTimeMillis(); Path partFile = new Path(outputDir, PARTITION_FILENAME); job.getConfiguration().setInt("dfs.replication", TeraSort.getOutputReplication(job));
job.setInputFormatClass(CustomV2InputFormat.class); job.setOutputFormatClass(CustomV2OutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.setInputPaths(job, new Path(igfsScheme() + inFile.toString())); FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT)); job.setNumReduceTasks(3); createJobInfo(job.getConfiguration(), null));
public int runGenerator(int numMappers, long numNodes, Path tmpOutput, Integer width, Integer wrapMultiplier, Integer numWalkers) throws Exception { LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes); createSchema(); job = Job.getInstance(getConf()); job.setJobName("Link Generator"); job.setNumReduceTasks(0); job.setJarByClass(getClass()); FileInputFormat.setInputPaths(job, tmpOutput); job.setInputFormatClass(OneFilePerMapperSFIF.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers); setMapperForGenerator(job); job.setOutputFormatClass(NullOutputFormat.class); job.getConfiguration().setBoolean("mapreduce.map.speculative", false); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); TableMapReduceUtil.initCredentials(job); boolean success = jobCompletion(job); return success ? 0 : 1; }
@Override public void configureJob(Job job) { job.setInputFormatClass(SequenceFileInputFormat.class); String jobId = job.getConfiguration().get(BatchConstants.ARG_CUBING_JOB_ID); IJoinedFlatTableDesc flatHiveTableDesc = new CubeJoinedFlatTableDesc(cubeSegment); String inputPath = JoinedFlatTable.getTableDir(flatHiveTableDesc, JobBuilderSupport.getJobWorkingDir(conf, jobId)); try { FileInputFormat.addInputPath(job, new Path(inputPath)); } catch (IOException e) { throw new IllegalStateException(e); } }
/** * Creates WordCount hadoop job for API v2. * * @param inFile Input file name for the job. * @param outFile Output file name for the job. * @return Hadoop job. * @throws Exception if fails. */ @Override public HadoopJobEx getHadoopJob(String inFile, String outFile) throws Exception { Job job = Job.getInstance(); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); HadoopWordCount2.setTasksClasses(job, true, true, true, false); Configuration conf = job.getConfiguration(); setupFileSystems(conf); FileInputFormat.setInputPaths(job, new Path(inFile)); FileOutputFormat.setOutputPath(job, new Path(outFile)); job.setJarByClass(HadoopWordCount2.class); Job hadoopJob = HadoopWordCount2.getJob(inFile, outFile); HadoopDefaultJobInfo jobInfo = createJobInfo(hadoopJob.getConfiguration(), null); UUID uuid = new UUID(0, 0); HadoopJobId jobId = new HadoopJobId(uuid, 0); return jobInfo.createJob(HadoopV2Job.class, jobId, log, null, new HadoopHelperImpl()); }
private void setupReducer(Path output, int numberOfReducers) throws IOException { job.setReducerClass(UHCDictionaryReducer.class); job.setPartitionerClass(UHCDictionaryPartitioner.class); job.setNumReduceTasks(numberOfReducers); MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class); FileOutputFormat.setOutputPath(job, output); job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString()); //prevent to create zero-sized default output LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); deletePath(job.getConfiguration(), output); }
protected Job doLoad(Configuration conf, HTableDescriptor htd) throws Exception { Path outputDir = getTestDir(TEST_NAME, "load-output"); LOG.info("Load output dir: " + outputDir); NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT)); conf.set(TABLE_NAME_KEY, htd.getTableName().getNameAsString()); Job job = Job.getInstance(conf); job.setJobName(TEST_NAME + " Load for " + htd.getTableName()); job.setJarByClass(this.getClass()); setMapperClass(job); job.setInputFormatClass(NMapInputFormat.class); job.setNumReduceTasks(0); setJobScannerConf(job); FileOutputFormat.setOutputPath(job, outputDir); TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class); TableMapReduceUtil.initCredentials(job); assertTrue(job.waitForCompletion(true)); return job; }
LOG.info("Before map/reduce startup"); job = new Job(table.getConfiguration(), "process column contents"); job.setNumReduceTasks(1); Scan scan = new Scan(); scan.addFamily(INPUT_FAMILY); table.getName().getNameAsString(), IdentityTableReducer.class, job); FileOutputFormat.setOutputPath(job, new Path("test")); LOG.info("Started " + table.getName()); assertTrue(job.waitForCompletion(true)); if (job != null) { FileUtil.fullyDelete( new File(job.getConfiguration().get("hadoop.tmp.dir")));
SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey); SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey); job.setReducerClass(CellSortReducer.class); job.setOutputFormatClass(HFileOutputFormat2.class); job.setNumReduceTasks(4); job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"), MutationSerialization.class.getName(), ResultSerialization.class.getName(), CellSerialization.class.getName());