org.apache.hadoop.mapreduce.Job.getConfiguration java code examples

Refine search

  @Override
  public void configure(Job job) throws IOException {
    job.getConfiguration().set(Hadoop1Compat.CFG_JOB_JAR, mapredJar);
  }
}

/**
 * Sets the job output value schema.
 *
 * @param job The job to configure.
 * @param schema The job output value schema.
 */
public static void setOutputValueSchema(Job job, Schema schema) {
 job.setOutputValueClass(AvroValue.class);
 job.getConfiguration().set(CONF_OUTPUT_VALUE_SCHEMA, schema.toString());
}

/**
 * Sets the job output key schema.
 *
 * @param job The job to configure.
 * @param schema The job output key schema.
 */
public static void setOutputKeySchema(Job job, Schema schema) {
 job.setOutputKeyClass(AvroKey.class);
 job.getConfiguration().set(CONF_OUTPUT_KEY_SCHEMA, schema.toString());
}

private void setupMapper(CubeSegment cubeSeg) throws IOException {
  // set the segment's offset info to job conf
  Map<Integer, Long> offsetStart = cubeSeg.getSourcePartitionOffsetStart();
  Map<Integer, Long> offsetEnd = cubeSeg.getSourcePartitionOffsetEnd();
  Integer minPartition = Collections.min(offsetStart.keySet());
  Integer maxPartition = Collections.max(offsetStart.keySet());
  job.getConfiguration().set(CONFIG_KAFKA_PARITION_MIN, minPartition.toString());
  job.getConfiguration().set(CONFIG_KAFKA_PARITION_MAX, maxPartition.toString());
  for(Integer partition: offsetStart.keySet()) {
    job.getConfiguration().set(CONFIG_KAFKA_PARITION_START + partition, offsetStart.get(partition).toString());
    job.getConfiguration().set(CONFIG_KAFKA_PARITION_END + partition, offsetEnd.get(partition).toString());
  }
  job.setMapperClass(KafkaFlatTableMapper.class);
  job.setInputFormatClass(KafkaInputFormat.class);
  job.setOutputKeyClass(BytesWritable.class);
  job.setOutputValueClass(Text.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setNumReduceTasks(0);
}

addDependencies(this.job.getConfiguration());
this.job.setMapperClass(TaskRunner.class);
this.job.setNumReduceTasks(0);
this.job.setInputFormatClass(GobblinWorkUnitsInputFormat.class);
this.job.setOutputFormatClass(GobblinOutputFormat.class);
this.job.setMapOutputKeyClass(NullWritable.class);
this.job.setMapOutputValueClass(NullWritable.class);
this.job.getConfiguration().set("mapreduce.job.user.classpath.first", "true");

public static void main(String[] args) throws Exception {
 CommandLine cli = StressTestUtils.parseCommandLine(OPTIONS, args);
 Configuration configuration = new Configuration();
 if (cli.hasOption(THROTTLING_SERVER_URI.getOpt())) {
  configuration.setBoolean(USE_THROTTLING_SERVER, true);
  String resourceLimited = cli.getOptionValue(RESOURCE_ID_OPT.getOpt(), "MRStressTest");
  configuration.set(RESOURCE_ID, resourceLimited);
  configuration.set(
    BrokerConfigurationKeyGenerator.generateKey(new SharedRestClientFactory(),
      new SharedRestClientKey(RestliLimiterFactory.RESTLI_SERVICE_NAME),
      null, SharedRestClientFactory.SERVER_URI_KEY), cli.getOptionValue(THROTTLING_SERVER_URI.getOpt()));
 }
 if (cli.hasOption(LOCAL_QPS_OPT.getOpt())) {
  configuration .set(LOCALLY_ENFORCED_QPS, cli.getOptionValue(LOCAL_QPS_OPT.getOpt()));
 }
 Job job = Job.getInstance(configuration, "ThrottlingStressTest");
 job.getConfiguration().setBoolean("mapreduce.job.user.classpath.first", true);
 job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
 job.getConfiguration().set(NUM_MAPPERS, cli.getOptionValue(NUM_MAPPERS_OPT.getOpt(), DEFAULT_MAPPERS));
 StressTestUtils.populateConfigFromCli(job.getConfiguration(), cli);
 job.setJarByClass(MRStressTest.class);
 job.setMapperClass(StresserMapper.class);
 job.setReducerClass(AggregatorReducer.class);
 job.setInputFormatClass(MyInputFormat.class);
 job.setOutputKeyClass(LongWritable.class);
 job.setOutputValueClass(DoubleWritable.class);
 FileOutputFormat.setOutputPath(job, new Path("/tmp/MRStressTest" + System.currentTimeMillis()));
 System.exit(job.waitForCompletion(true) ? 0 : 1);
}

@VisibleForTesting
static void serializeJobState(FileSystem fs, Path mrJobDir, Configuration conf, JobState jobState, Job job)
  throws IOException {
 Path jobStateFilePath = new Path(mrJobDir, JOB_STATE_FILE_NAME);
 // Write the job state with an empty task set (work units are read by the mapper from a different file)
 try (DataOutputStream dataOutputStream = new DataOutputStream(fs.create(jobStateFilePath))) {
  jobState.write(dataOutputStream, false,
    conf.getBoolean(SERIALIZE_PREVIOUS_WORKUNIT_STATES_KEY, DEFAULT_SERIALIZE_PREVIOUS_WORKUNIT_STATES));
 }
 job.getConfiguration().set(ConfigurationKeys.JOB_STATE_FILE_PATH_KEY, jobStateFilePath.toString());
 DistributedCache.addCacheFile(jobStateFilePath.toUri(), job.getConfiguration());
 job.getConfiguration().set(ConfigurationKeys.JOB_STATE_DISTRIBUTED_CACHE_NAME, jobStateFilePath.getName());
}

@Override
public void configureJobOutput(Job job, String output, CubeSegment segment) throws Exception {
  int reducerNum = MapReduceUtil.getLayeredCubingReduceTaskNum(segment, segment.getCuboidScheduler(),
      AbstractHadoopJob.getTotalMapInputMB(job), -1);
  job.setNumReduceTasks(reducerNum);
  Path outputPath = new Path(output);
  HadoopUtil.deletePath(job.getConfiguration(), outputPath);
  FileOutputFormat.setOutputPath(job, outputPath);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
}

String tabName = args[1];
conf.setStrings(TABLES_KEY, tabName);
conf.set(FileInputFormat.INPUT_DIR, inputDirs);
Job job =
  Job.getInstance(conf,
   conf.get(JOB_NAME_CONF_KEY, NAME + "_" + EnvironmentEdgeManager.currentTime()));
job.setJarByClass(MapReduceHFileSplitterJob.class);
job.setInputFormatClass(HFileInputFormat.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
 LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs);
 TableName tableName = TableName.valueOf(tabName);
 job.setMapperClass(HFileCellMapper.class);
 job.setReducerClass(CellSortReducer.class);
 Path outputDir = new Path(hfileOutPath);
 FileOutputFormat.setOutputPath(job, outputDir);
 job.setMapOutputValueClass(MapReduceExtendedCell.class);
 TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
  org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class);
} else {

Path betaInput = new Path(inputPath);
FileSystem fs = betaInput.getFileSystem(job.getConfiguration());
Set<String> paths = new TreeSet<>();
Pattern fileMatcher = Pattern.compile(filePattern);
 Path granularPath = new Path(betaInput, intervalPath);
 log.info("Checking path[%s]", granularPath);
 for (FileStatus status : FSSpideringIterator.spiderIterable(fs, granularPath)) {

public Job createSubmittableJob(String[] args) throws IOException {
 Path partitionsPath = new Path(destPath, PARTITIONS_FILE_NAME);
 generatePartitions(partitionsPath);
 Job job = Job.getInstance(getConf(),
    getConf().get("mapreduce.job.name", "hashTable_" + tableHash.tableName));
 Configuration jobConf = job.getConfiguration();
 jobConf.setLong(HASH_BATCH_SIZE_CONF_KEY, tableHash.batchSize);
 job.setJarByClass(HashTable.class);
 TableMapReduceUtil.initTableMapperJob(tableHash.tableName, tableHash.initScan(),
   HashMapper.class, ImmutableBytesWritable.class, ImmutableBytesWritable.class, job);
 // use a TotalOrderPartitioner and reducers to group region output into hash files
 job.setPartitionerClass(TotalOrderPartitioner.class);
 TotalOrderPartitioner.setPartitionFile(jobConf, partitionsPath);
 job.setReducerClass(Reducer.class);  // identity reducer
 job.setNumReduceTasks(tableHash.numHashFiles);
 job.setOutputKeyClass(ImmutableBytesWritable.class);
 job.setOutputValueClass(ImmutableBytesWritable.class);
 job.setOutputFormatClass(MapFileOutputFormat.class);
 FileOutputFormat.setOutputPath(job, new Path(destPath, HASH_DATA_DIR));
 return job;
}

Job job = Job.getInstance(conf);
Path inputDir = new Path(generateOutDir);
Path outputDir = new Path(sortOutDir);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TeraInputFormat.class);
job.setOutputFormatClass(TeraOutputFormat.class);
  long start = System.currentTimeMillis();
  Path partFile = new Path(outputDir, PARTITION_FILENAME);
job.getConfiguration().setInt("dfs.replication", TeraSort.getOutputReplication(job));

  job.setInputFormatClass(CustomV2InputFormat.class);
  job.setOutputFormatClass(CustomV2OutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path(igfsScheme() + inFile.toString()));
FileOutputFormat.setOutputPath(job, new Path(igfsScheme() + PATH_OUTPUT));
job.setNumReduceTasks(3);
    createJobInfo(job.getConfiguration(), null));

public int runGenerator(int numMappers, long numNodes, Path tmpOutput,
  Integer width, Integer wrapMultiplier, Integer numWalkers)
  throws Exception {
 LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes);
 createSchema();
 job = Job.getInstance(getConf());
 job.setJobName("Link Generator");
 job.setNumReduceTasks(0);
 job.setJarByClass(getClass());
 FileInputFormat.setInputPaths(job, tmpOutput);
 job.setInputFormatClass(OneFilePerMapperSFIF.class);
 job.setOutputKeyClass(NullWritable.class);
 job.setOutputValueClass(NullWritable.class);
 setJobConf(job, numMappers, numNodes, width, wrapMultiplier, numWalkers);
 setMapperForGenerator(job);
 job.setOutputFormatClass(NullOutputFormat.class);
 job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
 TableMapReduceUtil.addDependencyJars(job);
 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(),
                         AbstractHBaseTool.class);
 TableMapReduceUtil.initCredentials(job);
 boolean success = jobCompletion(job);
 return success ? 0 : 1;
}

@Override
public void configureJob(Job job) {
  job.setInputFormatClass(SequenceFileInputFormat.class);
  String jobId = job.getConfiguration().get(BatchConstants.ARG_CUBING_JOB_ID);
  IJoinedFlatTableDesc flatHiveTableDesc = new CubeJoinedFlatTableDesc(cubeSegment);
  String inputPath = JoinedFlatTable.getTableDir(flatHiveTableDesc,
      JobBuilderSupport.getJobWorkingDir(conf, jobId));
  try {
    FileInputFormat.addInputPath(job, new Path(inputPath));
  } catch (IOException e) {
    throw new IllegalStateException(e);
  }
}

/**
 * Creates WordCount hadoop job for API v2.
 *
 * @param inFile Input file name for the job.
 * @param outFile Output file name for the job.
 * @return Hadoop job.
 * @throws Exception if fails.
 */
@Override public HadoopJobEx getHadoopJob(String inFile, String outFile) throws Exception {
  Job job = Job.getInstance();
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(IntWritable.class);
  HadoopWordCount2.setTasksClasses(job, true, true, true, false);
  Configuration conf = job.getConfiguration();
  setupFileSystems(conf);
  FileInputFormat.setInputPaths(job, new Path(inFile));
  FileOutputFormat.setOutputPath(job, new Path(outFile));
  job.setJarByClass(HadoopWordCount2.class);
  Job hadoopJob = HadoopWordCount2.getJob(inFile, outFile);
  HadoopDefaultJobInfo jobInfo = createJobInfo(hadoopJob.getConfiguration(), null);
  UUID uuid = new UUID(0, 0);
  HadoopJobId jobId = new HadoopJobId(uuid, 0);
  return jobInfo.createJob(HadoopV2Job.class, jobId, log, null, new HadoopHelperImpl());
}

private void setupReducer(Path output, int numberOfReducers) throws IOException {
  job.setReducerClass(UHCDictionaryReducer.class);
  job.setPartitionerClass(UHCDictionaryPartitioner.class);
  job.setNumReduceTasks(numberOfReducers);
  MultipleOutputs.addNamedOutput(job, BatchConstants.CFG_OUTPUT_DICT, SequenceFileOutputFormat.class, NullWritable.class, ArrayPrimitiveWritable.class);
  FileOutputFormat.setOutputPath(job, output);
  job.getConfiguration().set(BatchConstants.CFG_OUTPUT_PATH, output.toString());
  //prevent to create zero-sized default output
  LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
  deletePath(job.getConfiguration(), output);
}

protected Job doLoad(Configuration conf, HTableDescriptor htd) throws Exception {
 Path outputDir = getTestDir(TEST_NAME, "load-output");
 LOG.info("Load output dir: " + outputDir);
 NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
 conf.set(TABLE_NAME_KEY, htd.getTableName().getNameAsString());
 Job job = Job.getInstance(conf);
 job.setJobName(TEST_NAME + " Load for " + htd.getTableName());
 job.setJarByClass(this.getClass());
 setMapperClass(job);
 job.setInputFormatClass(NMapInputFormat.class);
 job.setNumReduceTasks(0);
 setJobScannerConf(job);
 FileOutputFormat.setOutputPath(job, outputDir);
 TableMapReduceUtil.addDependencyJars(job);
 TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
 TableMapReduceUtil.initCredentials(job);
 assertTrue(job.waitForCompletion(true));
 return job;
}

LOG.info("Before map/reduce startup");
job = new Job(table.getConfiguration(), "process column contents");
job.setNumReduceTasks(1);
Scan scan = new Scan();
scan.addFamily(INPUT_FAMILY);
  table.getName().getNameAsString(),
  IdentityTableReducer.class, job);
FileOutputFormat.setOutputPath(job, new Path("test"));
LOG.info("Started " + table.getName());
assertTrue(job.waitForCompletion(true));
if (job != null) {
 FileUtil.fullyDelete(
   new File(job.getConfiguration().get("hadoop.tmp.dir")));

SimpleTotalOrderPartitioner.setStartKey(job.getConfiguration(), startKey);
SimpleTotalOrderPartitioner.setEndKey(job.getConfiguration(), endKey);
job.setReducerClass(CellSortReducer.class);
job.setOutputFormatClass(HFileOutputFormat2.class);
job.setNumReduceTasks(4);
job.getConfiguration().setStrings("io.serializations", conf.get("io.serializations"),
  MutationSerialization.class.getName(), ResultSerialization.class.getName(),
  CellSerialization.class.getName());

Popular methods of Job

setMapperClass
Set the Mapper for the job.
waitForCompletion
Submit the job to the cluster and wait for it to finish.
setInputFormatClass
Set the InputFormat for the job.
setJarByClass
Set the Jar by finding where a given class came from.
setOutputFormatClass
Set the OutputFormat for the job.
setOutputKeyClass
Set the key class for the job output data.
setOutputValueClass
Set the value class for job outputs.
setNumReduceTasks
Set the number of reduce tasks for the job.
setReducerClass
Set the Reducer for the job.
setMapOutputKeyClass
Set the key class for the map output data. This allows the user to specify the map output key class
setMapOutputValueClass
Set the value class for the map output data. This allows the user to specify the map output value cl
<init>

Popular in Java

Start an intent from android
getSupportFragmentManager (FragmentActivity)
setScale (BigDecimal)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
BufferedWriter (java.io)
Wraps an existing Writer and buffers the output. Expensive interaction with the underlying reader is
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
Proxy (java.net)
This class represents proxy server settings. A created instance of Proxy stores a type and an addres
Queue (java.util)
A collection designed for holding elements prior to processing. Besides basic java.util.Collection o
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
BasicDataSource (org.apache.commons.dbcp)
Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
Top 12 Jupyter Notebook extensions

How to use getConfigurationmethodin org.apache.hadoop.mapreduce.Job

Best Java code snippets using org.apache.hadoop.mapreduce.Job.getConfiguration (Showing top 20 results out of 2,340)

Refine search

How to use
getConfiguration
method
in
org.apache.hadoop.mapreduce.Job