/** * Adds a named output for the job. * <p/> * * @param job job to add the named output * @param namedOutput named output name, it has to be a word, letters * and numbers only, cannot be the word 'part' as * that is reserved for the default output. * @param outputFormatClass OutputFormat class. * @param keySchema Schema for the Key * @param valueSchema Schema for the Value (used in case of AvroKeyValueOutputFormat or null) */ @SuppressWarnings("unchecked") public static void addNamedOutput(Job job, String namedOutput, Class<? extends OutputFormat> outputFormatClass, Schema keySchema, Schema valueSchema) { checkNamedOutputName(job, namedOutput, true); Configuration conf = job.getConfiguration(); conf.set(MULTIPLE_OUTPUTS, conf.get(MULTIPLE_OUTPUTS, "") + " " + namedOutput); conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass, OutputFormat.class); conf.set(MO_PREFIX+namedOutput+".keyschema", keySchema.toString()); if(valueSchema!=null){ conf.set(MO_PREFIX+namedOutput+".valueschema",valueSchema.toString()); } }
/** * Write key and value to baseOutputPath using the namedOutput. * * @param namedOutput the named output name * @param key the key * @param value the value * @param baseOutputPath base-output path to write the record to. * Note: Framework will generate unique filename for the baseOutputPath */ @SuppressWarnings("unchecked") public void write(String namedOutput, Object key, Object value, String baseOutputPath) throws IOException, InterruptedException { checkNamedOutputName(context, namedOutput, false); checkBaseOutputPath(baseOutputPath); if (!namedOutputs.contains(namedOutput)) { throw new IllegalArgumentException("Undefined named output '" + namedOutput + "'"); } TaskAttemptContext taskContext = getContext(namedOutput); getRecordWriter(taskContext, baseOutputPath).write(key, value); }
/** * * Gets the record writer from job's output format. Job's output format should * be a FileOutputFormat.If the record writer implements Syncable then returns * the current position as a value that may be passed to DataFileReader.seek(long) * otherwise returns -1. * Forces the end of the current block, emitting a synchronization marker. * * @param namedOutput the namedOutput * @param baseOutputPath base-output path to write the record to. Note: Framework will * generate unique filename for the baseOutputPath */ @SuppressWarnings("unchecked") public long sync(String namedOutput, String baseOutputPath) throws IOException, InterruptedException { checkNamedOutputName(context, namedOutput, false); checkBaseOutputPath(baseOutputPath); if (!namedOutputs.contains(namedOutput)) { throw new IllegalArgumentException("Undefined named output '" + namedOutput + "'"); } TaskAttemptContext taskContext = getContext(namedOutput); RecordWriter recordWriter = getRecordWriter(taskContext, baseOutputPath); long position = -1; if (recordWriter instanceof Syncable) { Syncable syncableWriter = (Syncable) recordWriter; position = syncableWriter.sync(); } return position; } // by being synchronized MultipleOutputTask can be use with a
/** * Adds a named output for the job. * <p/> * * @param job job to add the named output * @param namedOutput named output name, it has to be a word, letters * and numbers only, cannot be the word 'part' as * that is reserved for the default output. * @param outputFormatClass OutputFormat class. * @param keySchema Schema for the Key * @param valueSchema Schema for the Value (used in case of AvroKeyValueOutputFormat or null) */ @SuppressWarnings("unchecked") public static void addNamedOutput(Job job, String namedOutput, Class<? extends OutputFormat> outputFormatClass, Schema keySchema, Schema valueSchema) { checkNamedOutputName(job, namedOutput, true); Configuration conf = job.getConfiguration(); conf.set(MULTIPLE_OUTPUTS, conf.get(MULTIPLE_OUTPUTS, "") + " " + namedOutput); conf.setClass(MO_PREFIX + namedOutput + FORMAT, outputFormatClass, OutputFormat.class); conf.set(MO_PREFIX+namedOutput+".keyschema", keySchema.toString()); if(valueSchema!=null){ conf.set(MO_PREFIX+namedOutput+".valueschema",valueSchema.toString()); } }
/** * Write key and value to baseOutputPath using the namedOutput. * * @param namedOutput the named output name * @param key the key * @param value the value * @param baseOutputPath base-output path to write the record to. * Note: Framework will generate unique filename for the baseOutputPath */ @SuppressWarnings("unchecked") public void write(String namedOutput, Object key, Object value, String baseOutputPath) throws IOException, InterruptedException { checkNamedOutputName(context, namedOutput, false); checkBaseOutputPath(baseOutputPath); if (!namedOutputs.contains(namedOutput)) { throw new IllegalArgumentException("Undefined named output '" + namedOutput + "'"); } TaskAttemptContext taskContext = getContext(namedOutput); getRecordWriter(taskContext, baseOutputPath).write(key, value); }
/** * * Gets the record writer from job's output format. Job's output format should * be a FileOutputFormat.If the record writer implements Syncable then returns * the current position as a value that may be passed to DataFileReader.seek(long) * otherwise returns -1. * Forces the end of the current block, emitting a synchronization marker. * * @param namedOutput the namedOutput * @param baseOutputPath base-output path to write the record to. Note: Framework will * generate unique filename for the baseOutputPath */ @SuppressWarnings("unchecked") public long sync(String namedOutput, String baseOutputPath) throws IOException, InterruptedException { checkNamedOutputName(context, namedOutput, false); checkBaseOutputPath(baseOutputPath); if (!namedOutputs.contains(namedOutput)) { throw new IllegalArgumentException("Undefined named output '" + namedOutput + "'"); } TaskAttemptContext taskContext = getContext(namedOutput); RecordWriter recordWriter = getRecordWriter(taskContext, baseOutputPath); long position = -1; if (recordWriter instanceof Syncable) { Syncable syncableWriter = (Syncable) recordWriter; position = syncableWriter.sync(); } return position; } // by being synchronized MultipleOutputTask can be use with a