/** * Create a new OutputCommitter for this OutputFormat. * * @param context the context to create the OutputCommitter from. * @return the new OutputCommitter for this format. * @throws IOException if there's an issue while creating the OutputCommitter. */ protected OutputCommitter createCommitter(TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); return getDelegate(conf).getOutputCommitter(context); }
public static Path getDefaultWorkFileOverride( org.apache.hadoop.mapreduce.lib.output.FileOutputFormat<?, ?> outputFormat, String name, TaskAttemptContext context, String extension ) throws IOException { FileOutputCommitter committer = (FileOutputCommitter) outputFormat.getOutputCommitter(context); return new Path(committer.getWorkPath(), name + extension); } }
/** Wraps the delegate's committer in a {@link FederatedBigQueryOutputCommitter}. */ @Override public OutputCommitter createCommitter(TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); OutputCommitter delegateCommitter = getDelegate(conf).getOutputCommitter(context); OutputCommitter committer = new FederatedBigQueryOutputCommitter(context, delegateCommitter); return committer; } }
/** Wraps the delegate's committer in a {@link IndirectBigQueryOutputCommitter}. */ @Override public OutputCommitter createCommitter(TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); OutputCommitter delegateCommitter = getDelegate(conf).getOutputCommitter(context); OutputCommitter committer = new IndirectBigQueryOutputCommitter(context, delegateCommitter); return committer; } }
/** * Get the default path and filename for the output format. * @param context the task context * @param extension an extension to add to the filename * @return a full path $output/_temporary/$taskid/part-[mr]-$id * @throws IOException */ public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException{ FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context); return new Path(committer.getWorkPath(), getUniqueFile(context, "part", extension)); }
/** * Get the default path and filename for the output format. * @param context the task context * @param extension an extension to add to the filename * @return a full path $output/_temporary/$taskid/part-[mr]-$id * @throws IOException */ public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException{ FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context); return new Path(committer.getWorkPath(), getUniqueFile(context, getOutputName(context), extension)); }
/** * Get the default path and filename for the output format. * @param context the task context * @param extension an extension to add to the filename * @return a full path $output/_temporary/$taskid/part-[mr]-$id * @throws IOException */ public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException{ FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context); return new Path(committer.getWorkPath(), getUniqueFile(context, getOutputName(context), extension)); }
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException { createOutputFormatIfNeeded(context); String outDir = context.getConfiguration().get("mapred.output.dir"); originalDir = outDir; FileOutputCommitter committer = (FileOutputCommitter) super.getOutputCommitter(context); baseDir = committer.getWorkPath() + ""; Configuration conf = new Configuration(context.getConfiguration()); TaskAttemptContext reContext; try { reContext = TaskAttemptContextFactory.get(conf, context.getTaskAttemptID()); } catch(Exception e) { throw new IOException(e); } reContext.getConfiguration().set("mapred.output.dir", baseDir); // This is for Hadoop 2.0 : reContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir", baseDir); try { return new ProxyOutputCommitter(new Path(originalDir), context, outputFormat.getOutputCommitter(reContext)); } catch(InterruptedException e) { throw new RuntimeException(e); } }
/** * Get the default path and filename for the output format. * @param context the task context * @param extension an extension to add to the filename * @return a full path $output/_temporary/$taskid/part-[mr]-$id * @throws IOException */ public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException{ FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context); return new Path(committer.getWorkPath(), getUniqueFile(context, getOutputName(context), extension)); }
@Override public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException { createOutputFormatIfNeeded(context); String outDir = context.getConfiguration().get("mapred.output.dir"); originalDir = outDir; FileOutputCommitter committer = (FileOutputCommitter) super.getOutputCommitter(context); baseDir = committer.getWorkPath() + ""; Configuration conf = new Configuration(context.getConfiguration()); TaskAttemptContext reContext; try { reContext = TaskAttemptContextFactory.get(conf, context.getTaskAttemptID()); } catch(Exception e) { throw new IOException(e); } reContext.getConfiguration().set("mapred.output.dir", baseDir); // This is for Hadoop 2.0 : reContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir", baseDir); try { return new ProxyOutputCommitter(new Path(originalDir), context, outputFormat.getOutputCommitter(reContext)); } catch(InterruptedException e) { throw new RuntimeException(e); } }
private void doOpen(String uId) throws Exception { this.hash = uId.hashCode(); Job job = writeOperation.sink.newJob(); FileOutputFormat.setOutputPath(job, new Path(path)); // Each Writer is responsible for writing one bundle of elements and is represented by one // unique Hadoop task based on uId/hash. All tasks share the same job ID. Since Dataflow // handles retrying of failed bundles, each task has one attempt only. JobID jobId = job.getJobID(); TaskID taskId = new TaskID(jobId, TaskType.REDUCE, hash); context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(taskId, 0)); FileOutputFormat<K, V> outputFormat = formatClass.newInstance(); recordWriter = outputFormat.getRecordWriter(context); outputCommitter = (FileOutputCommitter) outputFormat.getOutputCommitter(context); }
/** * Get the default path and filename for the output format. * @param context the task context * @param extension an extension to add to the filename * @return a full path $output/_temporary/$taskid/part-[mr]-$id * @throws IOException */ public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException{ FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context); return new Path(committer.getWorkPath(), getUniqueFile(context, getOutputName(context), extension)); }
private OutputCommitter createOutputCommitter( TaskAttemptContext context) throws IOException, InterruptedException { assert context != null; Set<OutputCommitter> components = new LinkedHashSet<>(); if (isBridgeOutputEnabled(context)) { OutputCommitter committer = bridgeOutputFormat.getOutputCommitter(context); if (components.contains(committer) == false) { components.add(committer); } } if (isFileOutputEnabled(context)) { OutputCommitter committer = dummyFileOutputFormat.getOutputCommitter(context); if (components.contains(committer) == false) { components.add(committer); } } if (isTemporaryOutputEnabled(context)) { FileOutputCommitter committer = temporaryOutputFormat.getOutputCommitter(context); if (components.contains(committer) == false) { components.add(committer); } } if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format( "Created stage output committers: {0}", //$NON-NLS-1$ components)); } return new CombinedOutputCommitter(new ArrayList<>(components)); }
/** * Get the default path and filename for the output format. * @param context the task context * @param extension an extension to add to the filename * @return a full path $output/_temporary/$taskid/part-[mr]-$id * @throws IOException */ public Path getDefaultWorkFile(TaskAttemptContext context, String extension) throws IOException{ FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context); return new Path(committer.getWorkPath(), getUniqueFile(context, getOutputName(context), extension)); }
@Override public void open(String uId) throws Exception { this.hash = uId.hashCode(); Job job = ((ConfigurableHDFSFileSink<K, V>) getWriteOperation().getSink()).jobInstance(); FileOutputFormat.setOutputPath(job, new Path(path)); // Each Writer is responsible for writing one bundle of elements and is represented by one // unique Hadoop task based on uId/hash. All tasks share the same job ID. Since Dataflow // handles retrying of failed bundles, each task has one attempt only. JobID jobId = job.getJobID(); TaskID taskId = new TaskID(jobId, TaskType.REDUCE, hash); configure(job); context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(taskId, 0)); FileOutputFormat<K, V> outputFormat = formatClass.newInstance(); recordWriter = outputFormat.getRecordWriter(context); outputCommitter = (FileOutputCommitter) outputFormat.getOutputCommitter(context); }
@Override public void open(String uId) throws Exception { this.hash = uId.hashCode(); Job job = ((ConfigurableHDFSFileSink<K, V>) getWriteOperation().getSink()).jobInstance(); FileOutputFormat.setOutputPath(job, new Path(path)); // Each Writer is responsible for writing one bundle of elements and is represented by one // unique Hadoop task based on uId/hash. All tasks share the same job ID. Since Dataflow // handles retrying of failed bundles, each task has one attempt only. JobID jobId = job.getJobID(); TaskID taskId = new TaskID(jobId, TaskType.REDUCE, hash); configure(job); context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(taskId, 0)); FileOutputFormat<K, V> outputFormat = formatClass.newInstance(); recordWriter = outputFormat.getRecordWriter(context); outputCommitter = (FileOutputCommitter) outputFormat.getOutputCommitter(context); }