org.apache.hadoop.mapred.OutputCommitter java code examples

@Override
public void finalizeGlobal(int parallelism) throws IOException {
  try {
    JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
    OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
    // finalize HDFS output format
    outputCommitter.commitJob(jobContext);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
  // enforce sequential open() calls
  synchronized (OPEN_MUTEX) {
    if (Integer.toString(taskNumber + 1).length() > 6) {
      throw new IOException("Task id too large.");
    }
    TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
        + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
        + Integer.toString(taskNumber + 1)
        + "_0");
    this.jobConf.set("mapred.task.id", taskAttemptID.toString());
    this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
    // for hadoop 2.2
    this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
    this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);
    this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);
    this.outputCommitter = this.jobConf.getOutputCommitter();
    JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
    this.outputCommitter.setupJob(jobContext);
    this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
  }
}

try {
 new JobConf(currContext.getConfiguration())
   .getOutputCommitter().abortJob(currContext,
     state);
} catch (Exception e) {

/**
 * commit the task by moving the output file out from the temporary directory.
 * @throws java.io.IOException
 */
@Override
public void close() throws IOException {
  // enforce sequential close() calls
  synchronized (CLOSE_MUTEX) {
    this.recordWriter.close(new HadoopDummyReporter());
    if (this.outputCommitter.needsTaskCommit(this.context)) {
      this.outputCommitter.commitTask(this.context);
    }
  }
}

  /** {@inheritDoc} */
  @Override public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopV2TaskContext ctx = (HadoopV2TaskContext)taskCtx;

    JobContext jobCtx = ctx.jobContext();

    try {
      OutputCommitter committer = jobCtx.getJobConf().getOutputCommitter();

      if (abort)
        committer.abortJob(jobCtx, JobStatus.State.FAILED);
      else
        committer.commitJob(jobCtx);
    }
    catch (IOException e) {
      throw new IgniteCheckedException(e);
    }
  }
}

baseOutputCommitter.setupJob(currJobContext);
baseOutputCommitter.setupTask(currTaskContext);

@Override
public void abortTask(TaskAttemptContext context) throws IOException {
 for (Map.Entry<String, OutputJobInfo> outputJobInfoEntry : dynamicOutputJobInfo.entrySet()) {
  String dynKey = outputJobInfoEntry.getKey();
  OutputJobInfo outputJobInfo = outputJobInfoEntry.getValue();
  LOG.info("Aborting task-attempt for " + outputJobInfo.getLocation());
  baseDynamicCommitters.get(dynKey)
             .abortTask(dynamicContexts.get(dynKey));
 }
}

/**
 * Setup task.
 *
 * @throws IOException If failed.
 */
public void setup() throws IOException {
  if (writer != null)
    jobConf.getOutputCommitter().setupTask(new TaskAttemptContextImpl(jobConf, attempt));
}

/**
 * This method implements the new interface by calling the old method. Note
 * that the input types are different between the new and old apis and this
 * is a bridge between the two.
 */
@Override
public final boolean 
 needsTaskCommit(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
         ) throws IOException {
 return needsTaskCommit((TaskAttemptContext) taskContext);
}

/**
 * This method implements the new interface by calling the old method. Note
 * that the input types are different between the new and old apis and this
 * is a bridge between the two.
 */
@Override
public final 
void commitTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
        ) throws IOException {
 commitTask((TaskAttemptContext) taskContext);
}

/**
 * For cleaning up the job's output after job failure.
 * 
 * @param jobContext Context of the job whose output is being written.
 * @param status Final run state of the job, should be 
 * {@link JobStatus#KILLED} or {@link JobStatus#FAILED}
 * @throws IOException
 */
public void abortJob(JobContext jobContext, int status) 
throws IOException {
 cleanupJob(jobContext);
}

/**
 * Commit task.
 *
 * @throws IOException In failed.
 */
public void commit() throws IOException {
  if (writer != null) {
    OutputCommitter outputCommitter = jobConf.getOutputCommitter();
    TaskAttemptContext taskCtx = new TaskAttemptContextImpl(jobConf, attempt);
    if (outputCommitter.needsTaskCommit(taskCtx))
      outputCommitter.commitTask(taskCtx);
  }
}

baseOutputCommitter.setupJob(currJobContext);
baseOutputCommitter.setupTask(currTaskContext);

  /**
   * Abort task.
   */
  public void abort() {
    try {
      if (writer != null)
        jobConf.getOutputCommitter().abortTask(new TaskAttemptContextImpl(jobConf, attempt));
    }
    catch (IOException ignore) {
      // No-op.
    }
  }
}

/**
 * This method implements the new interface by calling the old method. Note
 * that the input types are different between the new and old apis and this
 * is a bridge between the two.
 */
@Override
public final 
void setupTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
        ) throws IOException {
 setupTask((TaskAttemptContext) taskContext);
}

/**
 * This method implements the new interface by calling the old method. Note
 * that the input types are different between the new and old apis and this
 * is a bridge between the two.
 */
@Override
public final boolean 
 needsTaskCommit(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
         ) throws IOException {
 return needsTaskCommit((TaskAttemptContext) taskContext);
}

/**
 * This method implements the new interface by calling the old method. Note
 * that the input types are different between the new and old apis and this
 * is a bridge between the two.
 */
@Override
public final 
void commitTask(org.apache.hadoop.mapreduce.TaskAttemptContext taskContext
        ) throws IOException {
 commitTask((TaskAttemptContext) taskContext);
}

/**
 * For committing job's output after successful job completion. Note that this
 * is invoked for jobs with final runstate as SUCCESSFUL.    
 * 
 * @param jobContext Context of the job whose output is being written.
 * @throws IOException 
 */
public void commitJob(JobContext jobContext) throws IOException {
 cleanupJob(jobContext);
}

.getOutputCommitter().commitJob(context);

  /** {@inheritDoc} */
  @Override public void run(HadoopTaskContext taskCtx) throws IgniteCheckedException {
    HadoopV2TaskContext ctx = (HadoopV2TaskContext)taskCtx;

    try {
      ctx.jobConf().getOutputFormat().checkOutputSpecs(null, ctx.jobConf());

      OutputCommitter committer = ctx.jobConf().getOutputCommitter();

      if (committer != null)
        committer.setupJob(ctx.jobContext());
    }
    catch (IOException e) {
      throw new IgniteCheckedException(e);
    }
  }
}

Javadoc

OutputCommitter describes the commit of task output for a Map-Reduce job.

The Map-Reduce framework relies on the OutputCommitter of the job to:

Setup the job during initialization. For example, create the temporary output directory for the job during the initialization of the job.
Cleanup the job after the job completion. For example, remove the temporary output directory after the job completion.
Setup the task temporary output.
Check whether a task needs a commit. This is to avoid the commit procedure if a task does not need commit.
Commit of the task output.
Discard the task commit.

Most used methods

commitJob
This method implements the new interface by calling the old method. Note that the input types are di
setupJob
This method implements the new interface by calling the old method. Note that the input types are di
abortJob
This method implements the new interface by calling the old method. Note that the input types are di
abortTask
This method implements the new interface by calling the old method. Note that the input types are di
needsTaskCommit
This method implements the new interface by calling the old method. Note that the input types are di
setupTask
This method implements the new interface by calling the old method. Note that the input types are di
commitTask
This method implements the new interface by calling the old method. Note that the input types are di
cleanupJob
This method implements the new interface by calling the old method. Note that the input types are di
isCommitJobRepeatable
isRecoverySupported
This method implements the new interface by calling the old method. Note that the input types are di
recoverTask
This method implements the new interface by calling the old method. Note that the input types are di

recoverTask

Popular in Java

Making http requests using okhttp
setRequestProperty (URLConnection)
setScale (BigDecimal)
addToBackStack (FragmentTransaction)
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
ResultSet (java.sql)
An interface for an object which represents a database table entry, returned as the result of the qu
Calendar (java.util)
Calendar is an abstract base class for converting between a Date object and a set of integer fields
ExecutorService (java.util.concurrent)
An Executor that provides methods to manage termination and methods that can produce a Future for tr
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
Top 12 Jupyter Notebook extensions

How to useOutputCommitter in org.apache.hadoop.mapred

Best Java code snippets using org.apache.hadoop.mapred.OutputCommitter (Showing top 20 results out of 315)

How to use
OutputCommitter
in
org.apache.hadoop.mapred