org.apache.hadoop.mapred.JobClient java code examples

Refine search

JobConf conf = prepareJobConf(baseJobConf);
FileSystem fs = outputDir.getFileSystem(conf);
if(fs.exists(outputDir)) {
  info("Deleting previous output in " + outputDir + " for building store " + this.storeDef.getName());
  fs.delete(outputDir, true);
FileSystem outputFs = outputDir.getFileSystem(conf);
if(outputFs.exists(outputDir)) {
  throw new IOException("Final output directory already exists.");
JobClient jc = new JobClient(conf);
RunningJob runningJob = jc.submitJob(conf);
Counters counters;
  if (!jc.monitorAndPrintJob(conf, runningJob)) {
    counters = runningJob.getCounters();
        "=" + suggestedTargetChunkSize);
    } else {
      logger.error("Job Failed: " + runningJob.getFailureInfo());
counters = runningJob.getCounters();
long numberOfRecords = counters.getCounter(Task.Counter.REDUCE_INPUT_GROUPS);

@Test
public void testNonAvroMapOnly() throws Exception {
 JobConf job = new JobConf();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 // configure input for non-Avro sequence file
 job.setInputFormat(SequenceFileInputFormat.class);
 FileInputFormat.setInputPaths(job, file().toURI().toString());
 // use a hadoop mapper that emits Avro output
 job.setMapperClass(NonAvroOnlyMapper.class);
 // configure output for avro
 job.setNumReduceTasks(0);                     // map-only
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setOutputSchema(job, SCHEMA);
 JobClient.runJob(job);
 checkFile(new DataFileReader<>
      (new File(outputPath.toString() + "/part-00000.avro"),
       new SpecificDatumReader<>()));
}

private void logJob(String logDir, String jobID, PrintWriter listWriter)
 throws IOException {
 RunningJob rj = jobClient.getJob(JobID.forName(jobID));
 String jobURLString = rj.getTrackingURL();
 Path jobDir = new Path(logDir, jobID);
 fs.mkdirs(jobDir);
  logJobConf(jobID, jobURLString, jobDir.toString());
 } catch (IOException e) {
  System.err.println("Cannot retrieve job.xml.html for " + jobID);
 listWriter.println("job: " + jobID + "(" + "name=" + rj.getJobName() + ","
   + "status=" + JobStatus.getJobRunState(rj.getJobState()) + ")");
   logAttempt(jobID, attempt, jobDir.toString());
   listWriter.println("  attempt:" + attempt.id + "(" + "type="
     + attempt.type + "," + "status=" + attempt.status + ","

public static Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> getJobTrackerDelegationToken(
 Configuration conf, String userName) throws Exception {
 // LOG.info("getJobTrackerDelegationToken("+conf+","+userName+")");
 JobClient jcl = new JobClient(new JobConf(conf, HCatOutputFormat.class));
 Token<org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier> t = jcl
  .getDelegationToken(new Text(userName));
 // LOG.info("got "+t);
 return t;
 // return null;
}

/** Submit a job to the Map-Reduce framework. */
public static RunningJob submitJob(JobConf conf) throws IOException {
 setupTetherJob(conf);
 return new JobClient(conf).submitJob(conf);
}

/**
 * Return the status information about the Map-Reduce cluster
 */
public ClusterStatus getClusterStatus() throws Exception {
 ClusterStatus cs;
 try {
  JobConf job = new JobConf(conf);
  JobClient jc = new JobClient(job);
  cs = jc.getClusterStatus();
 } catch (Exception e) {
  e.printStackTrace();
  throw e;
 }
 LOG.info("Returning cluster status: " + cs.toString());
 return cs;
}

success = true;
HiveFileFormatUtils.prepareJobOutput(job);
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapperClass(work.getMapperClass());
job.setMapOutputKeyClass(NullWritable.class);
job.setMapOutputValueClass(NullWritable.class);
if(work.getNumMapTasks() != null) {
Path tempOutPath = Utilities.toTempPath(outputPath);
try {
 FileSystem fs = tempOutPath.getFileSystem(job);
 if (!fs.exists(tempOutPath)) {
  fs.mkdirs(tempOutPath);
  HiveConf.setVar(job, HiveConf.ConfVars.METASTOREPWD, "HIVE");
 JobClient jc = new JobClient(job);
 rj = jc.submitJob(job);
 this.jobID = rj.getJobID();
 returnVal = jobExecHelper.progress(rj, jc, ctx);
 success = (returnVal == 0);
 String mesg = rj != null ? ("Ended Job = " + rj.getJobID()) : "Job Submission failed";
    rj.killJob();

 DOMConfigurator.configure("log4j.xml");
FileSystem fs = FileSystem.get(job.getConfiguration());
Path execBasePath = new Path(props.getProperty(ETL_EXECUTION_BASE_PATH));
Path execHistory = new Path(props.getProperty(ETL_EXECUTION_HISTORY_PATH));
if (!fs.exists(execBasePath)) {
 log.info("The execution base path does not exist. Creating the directory");
 fs.mkdirs(execBasePath);
if (!fs.exists(execHistory)) {
 log.info("removing old execution: " + stat.getPath().getName());
 ContentSummary execContent = fs.getContentSummary(stat.getPath());
 currentCount -= execContent.getFileCount() + execContent.getDirectoryCount();
 JobClient client = new JobClient(new JobConf(job.getConfiguration()));
  for (TaskReport task : client.getMapTaskReports(tasks[0].getTaskAttemptId().getJobID())) {
   if (task.getCurrentStatus().equals(TIPStatus.FAILED)) {
    for (String s : task.getDiagnostics()) {

 FileSystem fs = emptyScratchDir.getFileSystem(job);
 fs.mkdirs(emptyScratchDir);
} catch (IOException e) {
 e.printStackTrace();
job.setOutputFormat(HiveOutputFormatImpl.class);
job.setMapRunnerClass(ExecMapRunner.class);
job.setMapperClass(ExecMapper.class);
   Path hdfsPath = mWork.getTmpHDFSPath();
   hdfs.copyFromLocalFile(archivePath, hdfsFilePath);
 jc = new JobClient(job);
 rj = jc.submitJob(job);
 this.jobID = rj.getJobID();
 updateStatusInQueryDisplay();
 returnVal = jobExecHelper.progress(rj, jc, ctx);
    killJob();
   jobID = rj.getID().toString();
   jc.close();

public int run(String[] args) throws Exception {
  if(args.length != 3)
    Utils.croak("USAGE: GenerateData input-file output-dir value-size");
  JobConf conf = new JobConf(getConf(), GenerateData.class);
  conf.setJobName("generate-data");
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(IntWritable.class);
  conf.setMapperClass(GenerateDataMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  conf.setNumReduceTasks(0);
  conf.setInputFormat(TextInputFormat.class);
  conf.setOutputFormat(SequenceFileOutputFormat.class);
  conf.setOutputKeyClass(BytesWritable.class);
  conf.setOutputValueClass(BytesWritable.class);
  Path inputPath = new Path(args[0]);
  FileInputFormat.setInputPaths(conf, inputPath);
  Path outputPath = new Path(args[1]);
  // delete output path if it already exists
  FileSystem fs = outputPath.getFileSystem(conf);
  if(fs.exists(outputPath))
    fs.delete(outputPath, true);
  FileOutputFormat.setOutputPath(conf, outputPath);
  conf.setInt("value.size", Integer.parseInt(args[2]));
  JobClient.runJob(conf);
  return 0;
}

@Test
/**
 * Run the identity job on a "bytes" Avro file using AvroAsTextInputFormat
 * and AvroTextOutputFormat to produce a sorted "bytes" Avro file.
 */
public void testSort() throws Exception {
 JobConf job = new JobConf();
 String inputPath = INPUT_DIR.getRoot().getPath();
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 WordCountUtil.writeLinesBytesFile(inputPath);
 job.setInputFormat(AvroAsTextInputFormat.class);
 job.setOutputFormat(AvroTextOutputFormat.class);
 job.setOutputKeyClass(Text.class);
 FileInputFormat.setInputPaths(job, new Path(inputPath));
 FileOutputFormat.setOutputPath(job, outputPath);
 JobClient.runJob(job);
 WordCountUtil.validateSortedFile(outputPath.toString() + "/part-00000.avro");
}

@Test
public void testJob() throws Exception {
 JobConf job = new JobConf();
 Path inputPath1 = new Path(INPUT_DIR_1.getRoot().getPath());
 Path inputPath2 = new Path(INPUT_DIR_2.getRoot().getPath());
 Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
 outputPath.getFileSystem(job).delete(outputPath);
 writeNamesFiles(new File(inputPath1.toUri().getPath()));
 writeBalancesFiles(new File(inputPath2.toUri().getPath()));
 job.setJobName("multiple-inputs-join");
 AvroMultipleInputs.addInputPath(job, inputPath1, NamesMapImpl.class,
     ReflectData.get().getSchema(NamesRecord.class));
 AvroMultipleInputs.addInputPath(job, inputPath2, BalancesMapImpl.class,
     ReflectData.get().getSchema(BalancesRecord.class));
 Schema keySchema = ReflectData.get().getSchema(KeyRecord.class);
 Schema valueSchema = ReflectData.get().getSchema(JoinableRecord.class);
 AvroJob.setMapOutputSchema(job,
     Pair.getPairSchema(keySchema, valueSchema));
 AvroJob.setOutputSchema(job,
     ReflectData.get().getSchema(CompleteRecord.class));
 AvroJob.setReducerClass(job, ReduceImpl.class);
 job.setNumReduceTasks(1);
 FileOutputFormat.setOutputPath(job, outputPath);
 AvroJob.setReflect(job);
 JobClient.runJob(job);
 validateCompleteFile(new File(OUTPUT_DIR.getRoot(), "part-00000.avro"));
}

public LogRetriever(String statusDir, JobType jobType, Configuration conf)
 throws IOException {
 this.statusDir = statusDir;
 this.jobType = jobType;
 attemptDetailPattern = Pattern.compile(attemptDetailPatternInString);
 attemptLogPattern = Pattern.compile(attemptLogPatternInString);
 attemptIDPattern = Pattern.compile(attemptIDPatternInString);
 attemptStartTimePattern = Pattern.compile(attemptStartTimePatternInString);
 attemptEndTimePattern = Pattern.compile(attemptEndTimePatternInString);
 Path statusPath = new Path(statusDir);
 fs = statusPath.getFileSystem(conf);
 jobClient = new JobClient(new JobConf(conf));
 this.conf = conf;
}

@Override
protected void runJob(String jobName, Configuration c, List<Scan> scans)
  throws IOException, InterruptedException, ClassNotFoundException {
 JobConf job = new JobConf(TEST_UTIL.getConfiguration());
 job.setJobName(jobName);
 job.setMapperClass(Mapper.class);
 job.setReducerClass(Reducer.class);
 TableMapReduceUtil.initMultiTableSnapshotMapperJob(getSnapshotScanMapping(scans), Mapper.class,
   ImmutableBytesWritable.class, ImmutableBytesWritable.class, job, true, restoreDir);
 TableMapReduceUtil.addDependencyJars(job);
 job.setReducerClass(Reducer.class);
 job.setNumReduceTasks(1); // one to get final "first" and "last" key
 FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
 LOG.info("Started " + job.getJobName());
 RunningJob runningJob = JobClient.runJob(job);
 runningJob.waitForCompletion();
 assertTrue(runningJob.isSuccessful());
 LOG.info("After map/reduce completion - job " + jobName);
}

public void testInputFormat() throws Exception {
 JobConf job = new JobConf();
 WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest");
 Schema subSchema = Schema.parse("{\"type\":\"record\"," +
                 "\"name\":\"PairValue\","+
                 "\"fields\": [ " +
                 "{\"name\":\"value\", \"type\":\"long\"}" +
                 "]}");
 AvroJob.setInputSchema(job, subSchema);
 AvroJob.setMapperClass(job, Counter.class);
 FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/out/*"));
 job.setInputFormat(AvroTrevniInputFormat.class);
 job.setNumReduceTasks(0);                     // map-only
 job.setOutputFormat(NullOutputFormat.class);  // ignore output
 total = 0;
 JobClient.runJob(job);
 assertEquals(WordCountUtil.TOTAL, total);
}

public void testOutputFormat() throws Exception {
 JobConf job = new JobConf();
 WordCountUtil wordCountUtil = new WordCountUtil("trevniMapredTest");
 wordCountUtil.writeLinesFile();
 AvroJob.setInputSchema(job, STRING);
 AvroJob.setOutputSchema(job, Pair.getPairSchema(STRING,LONG));
 AvroJob.setMapperClass(job, MapImpl.class);
 AvroJob.setCombinerClass(job, ReduceImpl.class);
 AvroJob.setReducerClass(job, ReduceImpl.class);
 FileInputFormat.setInputPaths(job, new Path(wordCountUtil.getDir().toString() + "/in"));
 FileOutputFormat.setOutputPath(job, new Path(wordCountUtil.getDir().toString() + "/out"));
 FileOutputFormat.setCompressOutput(job, true);
 job.setOutputFormat(AvroTrevniOutputFormat.class);
 JobClient.runJob(job);
 wordCountUtil.validateCountsFile();
}

                int curDirNumber, int obsoleteDirNumber, HiveConf hiveConf,
                IMetaStoreClient msc, long id, String jobName) throws IOException {
job.setBoolean(IS_MAJOR, compactionType == CompactionType.MAJOR);
if(dirsToSearch == null) {
 dirsToSearch = new StringableList();
if (baseDir != null) job.set(BASE_DIR, baseDir.toString());
job.set(DELTA_DIRS, deltaDirs.toString());
job.set(DIRS_TO_SEARCH, dirsToSearch.toString());
job.setLong(MIN_TXN, minTxn);
JobClient jc = null;
try {
 jc = new JobClient(job);
 RunningJob rj = jc.submitJob(job);
 LOG.info("Submitted compaction job '" + job.getJobName() +
   "' with jobID=" + rj.getID() + " compaction ID=" + id);
 try {
  msc.setHadoopJobid(rj.getID().toString(), id);
 } catch (TException e) {
  LOG.warn("Error setting hadoop job, jobId=" + rj.getID().toString()
    + " compactionId=" + id, e);
  jc.close();

@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceEvaluation() throws Exception {
 Configuration cfg = UTIL.getConfiguration();
 JobConf jobConf = new JobConf(cfg);
 try {
  jobConf.setJobName("process row task");
  jobConf.setNumReduceTasks(1);
  TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
    ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
    jobConf);
  TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
    ClassificatorRowReduce.class, jobConf);
  RunningJob job = JobClient.runJob(jobConf);
  assertTrue(job.isSuccessful());
 } finally {
  if (jobConf != null)
   FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
 }
}

void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
 Configuration conf = UTIL.getConfiguration();
 final JobConf job = new JobConf(conf);
 job.setInputFormat(clazz);
 job.setOutputFormat(NullOutputFormat.class);
 job.setMapperClass(ExampleVerifier.class);
 job.setNumReduceTasks(0);
 LOG.debug("submitting job.");
 final RunningJob run = JobClient.runJob(job);
 assertTrue("job failed!", run.isSuccessful());
 assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
 assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
 assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
 assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
 assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
 assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
   .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
}

 JobConf jobConf = new JobConf(util.getConfiguration());
 jobConf.setJarByClass(util.getClass());
 org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.addDependencyJarsForClasses(jobConf,
  TestTableSnapshotInputFormat.class);
 jobConf.setReducerClass(TestTableSnapshotInputFormat.TestTableSnapshotReducer.class);
 jobConf.setNumReduceTasks(1);
 jobConf.setOutputFormat(NullOutputFormat.class);
 RunningJob job = JobClient.runJob(jobConf);
 Assert.assertTrue(job.isSuccessful());
} finally {
 if (!shutdownCluster) {

Javadoc

JobClient is the primary interface for the user-job to interact with the cluster. JobClient provides facilities to submit jobs, track their progress, access component-tasks' reports/logs, get the Map-Reduce cluster status information etc.

The job submission process involves:

Checking the input and output specifications of the job.
Computing the InputSplits for the job.
Setup the requisite accounting information for the DistributedCache of the job, if necessary.
Copying the job's jar and configuration to the map-reduce system directory on the distributed file-system.
Submitting the job to the cluster and optionally monitoring it's status.

Normally the user creates the application, describes various facets of the job via JobConf and then uses the JobClient to submit the job and monitor its progress.

Here is an example on how to use JobClient:

 
// Create a new JobConf 
JobConf job = new JobConf(new Configuration(), MyJob.class); 
// Specify various job-specific parameters      
job.setJobName("myjob"); 
job.setInputPath(new Path("in")); 
job.setOutputPath(new Path("out")); 
job.setMapperClass(MyJob.MyMapper.class); 
job.setReducerClass(MyJob.MyReducer.class); 
// Submit the job, then poll for progress until the job is complete 
JobClient.runJob(job);

Job Control

At times clients would chain map-reduce jobs to accomplish complex tasks which cannot be done via a single map-reduce job. This is fairly easy since the output of the job, typically, goes to distributed file-system and that can be used as the input for the next job.

However, this also means that the onus on ensuring jobs are complete (success/failure) lies squarely on the clients. In such situations the various job-control options are:

#runJob(JobConf) : submits the job and returns only after the job has completed.
#submitJob(JobConf) : only submits the job, then poll the returned handle to the RunningJob to query status and make scheduling decisions.
JobConf#setJobEndNotificationURI(String) : setup a notification on job-completion, thus avoiding polling.

Most used methods

<init>
Build a job client with the given JobConf, and connect to the default cluster
runJob
Utility that submits a job, then polls for progress until the job is complete.
getJob
Get an RunningJob object to track an ongoing job. Returns null if the id does not correspond to any
submitJob
Submit a job to the MR system. This returns a handle to the RunningJob which can be used to track th
getClusterStatus
Get status information about the Map-Reduce cluster.
getMapTaskReports
Get the information of the current state of the map tasks of a job.
close
Close the JobClient.
getReduceTaskReports
Get the information of the current state of the reduce tasks of a job.
getAllJobs
Get the jobs that are submitted.
monitorAndPrintJob
Monitor a job and print status in real-time as progress is made and tasks fail.
getDelegationToken
Get a delegation token for the user from the JobTracker.
setConf

Popular in Java

Updating database using SQL prepared statement
onRequestPermissionsResult (Fragment)
getApplicationContext (Context)
setRequestProperty (URLConnection)
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
System (java.lang)
Provides access to system-related information and resources including standard input and output. Ena
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
Best IntelliJ plugins

How to useJobClient in org.apache.hadoop.mapred

Best Java code snippets using org.apache.hadoop.mapred.JobClient (Showing top 20 results out of 1,116)

Refine search

How to use
JobClient
in
org.apache.hadoop.mapred