/** * Setup task. * * @throws IOException If failed. */ public void setup() throws IOException { if (writer != null) jobConf.getOutputCommitter().setupTask(new TaskAttemptContextImpl(jobConf, attempt)); }
/** * Abort task. */ public void abort() { try { if (writer != null) jobConf.getOutputCommitter().abortTask(new TaskAttemptContextImpl(jobConf, attempt)); } catch (IOException ignore) { // No-op. } } }
/** * Commit task. * * @throws IOException In failed. */ public void commit() throws IOException { if (writer != null) { OutputCommitter outputCommitter = jobConf.getOutputCommitter(); TaskAttemptContext taskCtx = new TaskAttemptContextImpl(jobConf, attempt); if (outputCommitter.needsTaskCommit(taskCtx)) outputCommitter.commitTask(taskCtx); } }
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws java.io.IOException */ @Override public void open(int taskNumber, int numTasks) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); this.outputCommitter = this.jobConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(this.jobConf, new JobID()); this.outputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); } }
FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter();
private void testMapOnlyNoOutputInternal(int version) throws Exception { JobConf conf = new JobConf(); //This is not set on purpose. FileOutputFormat.setOutputPath(conf, outDir); conf.set(JobContext.TASK_ATTEMPT_ID, attempt); conf.setInt(org.apache.hadoop.mapreduce.lib.output. FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(); // setup committer.setupJob(jContext); committer.setupTask(tContext); if(committer.needsTaskCommit(tContext)) { // do commit committer.commitTask(tContext); } committer.commitJob(jContext); // validate output FileUtil.fullyDelete(new File(outDir.toString())); }
FileOutputFormat.setOutputPath(conf, outDir); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter();
private void testMapFileOutputCommitterInternal(int version) throws Exception { JobConf conf = new JobConf(); FileOutputFormat.setOutputPath(conf, outDir); conf.set(JobContext.TASK_ATTEMPT_ID, attempt); conf.setInt(org.apache.hadoop.mapreduce.lib.output. FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(); // setup committer.setupJob(jContext); committer.setupTask(tContext); // write output MapFileOutputFormat theOutputFormat = new MapFileOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(null, conf, partFile, null); writeMapFileOutput(theRecordWriter, tContext); // do commit if(committer.needsTaskCommit(tContext)) { committer.commitTask(tContext); } committer.commitJob(jContext); // validate output validateMapFileOutputContent(FileSystem.get(conf), outDir); FileUtil.fullyDelete(new File(outDir.toString())); }
commitVersion); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(); recoveryVersion); JobContext jContext2 = new JobContextImpl(conf2, taskID.getJobID()); TaskAttemptContext tContext2 = new TaskAttemptContextImpl(conf2, taskID); FileOutputCommitter committer2 = new FileOutputCommitter(); committer2.setupJob(jContext2);
private void testCommitterInternal(int version) throws Exception { JobConf conf = new JobConf(); FileOutputFormat.setOutputPath(conf, outDir); conf.set(JobContext.TASK_ATTEMPT_ID, attempt); conf.setInt(org.apache.hadoop.mapreduce.lib.output. FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter(); // setup committer.setupJob(jContext); committer.setupTask(tContext); // write output TextOutputFormat theOutputFormat = new TextOutputFormat(); RecordWriter theRecordWriter = theOutputFormat.getRecordWriter(null, conf, partFile, null); writeOutput(theRecordWriter, tContext); // do commit if(committer.needsTaskCommit(tContext)) { committer.commitTask(tContext); } committer.commitJob(jContext); // validate output validateContent(outDir); FileUtil.fullyDelete(new File(outDir.toString())); }
FILEOUTPUTCOMMITTER_FAILURE_ATTEMPTS, maxAttempts); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new CommitterWithFailedThenSucceed();
public static org.apache.hadoop.mapred.TaskAttemptContext getOldTaskAttemptContext(TaskAttemptContext context) { return new TaskAttemptContextImpl(new JobConf(context.getConfiguration()), getTaskAttemptID(context)); }
FileOutputCommitter.FILEOUTPUTCOMMITTER_ALGORITHM_VERSION, version); JobContext jContext = new JobContextImpl(conf, taskID.getJobID()); TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID); FileOutputCommitter committer = new FileOutputCommitter();
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { TaskAttemptContext taskContext = new TaskAttemptContextImpl(taskAttempt.conf, TypeConverter.fromYarn(taskAttempt.attemptId)); taskAttempt.eventHandler.handle(new CommitterTaskAbortEvent( taskAttempt.attemptId, taskContext)); } }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { TaskAttemptContext taskContext = new TaskAttemptContextImpl(taskAttempt.conf, TypeConverter.fromYarn(taskAttempt.attemptId)); taskAttempt.eventHandler.handle(new CommitterTaskAbortEvent( taskAttempt.attemptId, taskContext)); } }
@SuppressWarnings("unchecked") @Override public void transition(TaskAttemptImpl taskAttempt, TaskAttemptEvent event) { TaskAttemptContext taskContext = new TaskAttemptContextImpl(taskAttempt.conf, TypeConverter.fromYarn(taskAttempt.attemptId)); taskAttempt.eventHandler.handle(new CommitterTaskAbortEvent( taskAttempt.attemptId, taskContext)); } }
/** * Helper function to create the task's temporary output directory and * return the path to the task's output file. * * @param conf job-configuration * @param name temporary task-output filename * @return path to the task's temporary output file * @throws IOException */ public static Path getTaskOutputPath(JobConf conf, String name) throws IOException { // ${mapred.out.dir} Path outputPath = getOutputPath(conf); if (outputPath == null) { throw new IOException("Undefined job output-path"); } OutputCommitter committer = conf.getOutputCommitter(); Path workPath = outputPath; TaskAttemptContext context = new TaskAttemptContextImpl(conf, TaskAttemptID.forName(conf.get( JobContext.TASK_ATTEMPT_ID))); if (committer instanceof FileOutputCommitter) { workPath = ((FileOutputCommitter)committer).getWorkPath(context, outputPath); } // ${mapred.out.dir}/_temporary/_${taskid}/${name} return new Path(workPath, name); }
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws java.io.IOException */ @Override public void open(int taskNumber, int numTasks) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID); this.outputCommitter = this.jobConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(this.jobConf, new JobID()); this.outputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); } }
@Override @Nonnull public List<Processor> get(int count) { return range(0, count).mapToObj(i -> { try { String uuid = context.jetInstance().getCluster().getLocalMember().getUuid(); TaskAttemptID taskAttemptID = new TaskAttemptID("jet-node-" + uuid, jobContext.getJobID().getId(), JOB_SETUP, i, 0); jobConf.set("mapred.task.id", taskAttemptID.toString()); jobConf.setInt("mapred.task.partition", i); TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(jobConf, taskAttemptID); @SuppressWarnings("unchecked") OutputFormat<K, V> outFormat = jobConf.getOutputFormat(); RecordWriter<K, V> recordWriter = outFormat.getRecordWriter( null, jobConf, uuid + '-' + valueOf(i), Reporter.NULL); return new WriteHdfsP<>( recordWriter, taskAttemptContext, outputCommitter, extractKeyFn, extractValueFn); } catch (IOException e) { throw new JetException(e); } }).collect(toList()); } }
@Override @Nonnull public List<Processor> get(int count) { return range(0, count).mapToObj(i -> { try { String uuid = context.jetInstance().getCluster().getLocalMember().getUuid(); TaskAttemptID taskAttemptID = new TaskAttemptID("jet-node-" + uuid, jobContext.getJobID().getId(), JOB_SETUP, i, 0); jobConf.set("mapred.task.id", taskAttemptID.toString()); jobConf.setInt("mapred.task.partition", i); TaskAttemptContextImpl taskAttemptContext = new TaskAttemptContextImpl(jobConf, taskAttemptID); @SuppressWarnings("unchecked") OutputFormat<K, V> outFormat = jobConf.getOutputFormat(); RecordWriter<K, V> recordWriter = outFormat.getRecordWriter( null, jobConf, uuid + '-' + valueOf(i), Reporter.NULL); return new WriteHdfsP<>( recordWriter, taskAttemptContext, outputCommitter, extractKeyFn, extractValueFn); } catch (IOException e) { throw new JetException(e); } }).collect(toList()); } }