public void configure(JobConf job) { schema = AvroJob.getMapOutputSchema(job); }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) schema = AvroJob.getMapOutputSchema(conf); }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) schema = Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)); }
/** Returns the specified map output deserializer. Defaults to the final * output deserializer if no map output schema was specified. */ public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) { Configuration conf = getConf(); boolean isKey = AvroKey.class.isAssignableFrom(c); Schema schema = isKey ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf)); GenericData dataModel = AvroJob.createMapOutputDataModel(conf); DatumReader<T> datumReader = dataModel.createDatumReader(schema); return new AvroWrapperDeserializer(datumReader, isKey); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); GenericData dataModel = AvroJob.createDataModel(conf); return new AvroWrapperSerializer(dataModel.createDatumWriter(schema)); }
public void reduce(TetherData datum, Iterator<NullWritable> ignore, OutputCollector<TetherData, NullWritable> collector, Reporter reporter) throws IOException { try { if (process == null) { process = new TetheredProcess(job, collector, reporter); process.inputClient.configure (TaskType.REDUCE, AvroJob.getMapOutputSchema(job).toString(), AvroJob.getOutputSchema(job).toString()); } process.inputClient.input(datum.buffer(), datum.count()); } catch (IOException e) { error = true; throw e; } catch (Exception e) { error = true; throw new IOException(e); } }
? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { boolean isMapOnly = job.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job); GenericData dataModel = AvroJob.createDataModel(job); final DataFileWriter<T> writer = new DataFileWriter<T>(dataModel.createDatumWriter(null)); configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name+EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { writer.append(wrapper.datum()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
(TaskType.MAP, job.get(AvroJob.INPUT_SCHEMA), AvroJob.getMapOutputSchema(job).toString());
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) schema = AvroJob.getMapOutputSchema(conf); }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) schema = AvroJob.getMapOutputSchema(conf); }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) { Schema mapOutputSchema = AvroJob.getMapOutputSchema(conf); Schema keySchema = org.apache.avro.mapred.Pair.getKeySchema(mapOutputSchema); schema = keySchema.getFields().get(0).schema(); } }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) { Schema mapOutputSchema = AvroJob.getMapOutputSchema(conf); Schema keySchema = org.apache.avro.mapred.Pair.getKeySchema(mapOutputSchema); schema = keySchema.getFields().get(0).schema(); } }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) { Schema mapOutputSchema = AvroJob.getMapOutputSchema(conf); Schema keySchema = org.apache.avro.mapred.Pair.getKeySchema(mapOutputSchema); schema = keySchema.getFields().get(0).schema(); mode = AvroMode.fromShuffleConfiguration(conf); } }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf != null) schema = Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair .getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); ReflectDataFactory factory = Avros.getReflectDataFactory(conf); ReflectDatumWriter<T> writer = factory.getWriter(schema); return new AvroWrapperSerializer(writer); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair .getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); ReaderWriterFactory factory = AvroMode.fromShuffleConfiguration(conf); DatumWriter<T> writer = factory.getWriter(schema); return new AvroWrapperSerializer(writer); }
/** Returns the specified map output deserializer. Defaults to the final * output deserializer if no map output schema was specified. */ public Deserializer<AvroWrapper<T>> getDeserializer(Class<AvroWrapper<T>> c) { Configuration conf = getConf(); boolean isKey = AvroKey.class.isAssignableFrom(c); Schema schema = isKey ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf)); GenericData dataModel = AvroJob.createMapOutputDataModel(conf); DatumReader<T> datumReader = dataModel.createDatumReader(schema); return new AvroWrapperDeserializer(datumReader, isKey); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); GenericData dataModel = AvroJob.createDataModel(conf); return new AvroWrapperSerializer(dataModel.createDatumWriter(schema)); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // Here we must rely on mapred.task.is.map to tell whether the map output // or final output is needed. boolean isMap = getConf().getBoolean("mapred.task.is.map", false); Schema schema = !isMap ? AvroJob.getOutputSchema(getConf()) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(getConf())) : Pair.getValueSchema(AvroJob.getMapOutputSchema(getConf()))); return new AvroWrapperSerializer(new SpecificDatumWriter<T>(schema)); }