/** * Checks if a named output name is valid. * * @param namedOutput named output Name * @throws IllegalArgumentException if the output name is not valid. */ private static void checkNamedOutputName(JobContext job, String namedOutput, boolean alreadyDefined) { checkTokenName(namedOutput); checkBaseOutputPath(namedOutput); List<String> definedChannels = getNamedOutputsList(job); if (alreadyDefined && definedChannels.contains(namedOutput)) { throw new IllegalArgumentException("Named output '" + namedOutput + "' already alreadyDefined"); } else if (!alreadyDefined && !definedChannels.contains(namedOutput)) { throw new IllegalArgumentException("Named output '" + namedOutput + "' not defined"); } }
/** * Write key and value to baseOutputPath using the namedOutput. * * @param namedOutput the named output name * @param key the key * @param value the value * @param baseOutputPath base-output path to write the record to. * Note: Framework will generate unique filename for the baseOutputPath */ @SuppressWarnings("unchecked") public void write(String namedOutput, Object key, Object value, String baseOutputPath) throws IOException, InterruptedException { checkNamedOutputName(context, namedOutput, false); checkBaseOutputPath(baseOutputPath); if (!namedOutputs.contains(namedOutput)) { throw new IllegalArgumentException("Undefined named output '" + namedOutput + "'"); } TaskAttemptContext taskContext = getContext(namedOutput); getRecordWriter(taskContext, baseOutputPath).write(key, value); }
/** * * Gets the record writer from job's output format. Job's output format should * be a FileOutputFormat.If the record writer implements Syncable then returns * the current position as a value that may be passed to DataFileReader.seek(long) * otherwise returns -1. * Forces the end of the current block, emitting a synchronization marker. * * @param namedOutput the namedOutput * @param baseOutputPath base-output path to write the record to. Note: Framework will * generate unique filename for the baseOutputPath */ @SuppressWarnings("unchecked") public long sync(String namedOutput, String baseOutputPath) throws IOException, InterruptedException { checkNamedOutputName(context, namedOutput, false); checkBaseOutputPath(baseOutputPath); if (!namedOutputs.contains(namedOutput)) { throw new IllegalArgumentException("Undefined named output '" + namedOutput + "'"); } TaskAttemptContext taskContext = getContext(namedOutput); RecordWriter recordWriter = getRecordWriter(taskContext, baseOutputPath); long position = -1; if (recordWriter instanceof Syncable) { Syncable syncableWriter = (Syncable) recordWriter; position = syncableWriter.sync(); } return position; } // by being synchronized MultipleOutputTask can be use with a
/** * Write key value to an output file name. * * Gets the record writer from job's output format. Job's output format should * be a FileOutputFormat. * * @param key the key * @param value the value * @param keySchema keySchema to use * @param valSchema ValueSchema to use * @param baseOutputPath base-output path to write the record to. Note: Framework will * generate unique filename for the baseOutputPath */ @SuppressWarnings("unchecked") public void write(Object key, Object value, Schema keySchema, Schema valSchema, String baseOutputPath) throws IOException, InterruptedException { checkBaseOutputPath(baseOutputPath); Job job = new Job(context.getConfiguration()); setSchema(job, keySchema, valSchema); TaskAttemptContext taskContext = createTaskAttemptContext(job.getConfiguration(), context.getTaskAttemptID()); getRecordWriter(taskContext, baseOutputPath).write(key, value); }
/** * Checks if a named output name is valid. * * @param namedOutput named output Name * @throws IllegalArgumentException if the output name is not valid. */ private static void checkNamedOutputName(JobContext job, String namedOutput, boolean alreadyDefined) { checkTokenName(namedOutput); checkBaseOutputPath(namedOutput); List<String> definedChannels = getNamedOutputsList(job); if (alreadyDefined && definedChannels.contains(namedOutput)) { throw new IllegalArgumentException("Named output '" + namedOutput + "' already alreadyDefined"); } else if (!alreadyDefined && !definedChannels.contains(namedOutput)) { throw new IllegalArgumentException("Named output '" + namedOutput + "' not defined"); } }
/** * Write key and value to baseOutputPath using the namedOutput. * * @param namedOutput the named output name * @param key the key * @param value the value * @param baseOutputPath base-output path to write the record to. * Note: Framework will generate unique filename for the baseOutputPath */ @SuppressWarnings("unchecked") public void write(String namedOutput, Object key, Object value, String baseOutputPath) throws IOException, InterruptedException { checkNamedOutputName(context, namedOutput, false); checkBaseOutputPath(baseOutputPath); if (!namedOutputs.contains(namedOutput)) { throw new IllegalArgumentException("Undefined named output '" + namedOutput + "'"); } TaskAttemptContext taskContext = getContext(namedOutput); getRecordWriter(taskContext, baseOutputPath).write(key, value); }
/** * * Gets the record writer from job's output format. Job's output format should * be a FileOutputFormat.If the record writer implements Syncable then returns * the current position as a value that may be passed to DataFileReader.seek(long) * otherwise returns -1. * Forces the end of the current block, emitting a synchronization marker. * * @param namedOutput the namedOutput * @param baseOutputPath base-output path to write the record to. Note: Framework will * generate unique filename for the baseOutputPath */ @SuppressWarnings("unchecked") public long sync(String namedOutput, String baseOutputPath) throws IOException, InterruptedException { checkNamedOutputName(context, namedOutput, false); checkBaseOutputPath(baseOutputPath); if (!namedOutputs.contains(namedOutput)) { throw new IllegalArgumentException("Undefined named output '" + namedOutput + "'"); } TaskAttemptContext taskContext = getContext(namedOutput); RecordWriter recordWriter = getRecordWriter(taskContext, baseOutputPath); long position = -1; if (recordWriter instanceof Syncable) { Syncable syncableWriter = (Syncable) recordWriter; position = syncableWriter.sync(); } return position; } // by being synchronized MultipleOutputTask can be use with a
/** * Write key value to an output file name. * * Gets the record writer from job's output format. Job's output format should * be a FileOutputFormat. * * @param key the key * @param value the value * @param keySchema keySchema to use * @param valSchema ValueSchema to use * @param baseOutputPath base-output path to write the record to. Note: Framework will * generate unique filename for the baseOutputPath */ @SuppressWarnings("unchecked") public void write(Object key, Object value, Schema keySchema, Schema valSchema, String baseOutputPath) throws IOException, InterruptedException { checkBaseOutputPath(baseOutputPath); Job job = new Job(context.getConfiguration()); setSchema(job, keySchema, valSchema); TaskAttemptContext taskContext = createTaskAttemptContext(job.getConfiguration(), context.getTaskAttemptID()); getRecordWriter(taskContext, baseOutputPath).write(key, value); }