public void reduce(TetherData datum, Iterator<NullWritable> ignore, OutputCollector<TetherData, NullWritable> collector, Reporter reporter) throws IOException { try { if (process == null) { process = new TetheredProcess(job, collector, reporter); process.inputClient.configure (TaskType.REDUCE, AvroJob.getMapOutputSchema(job).toString(), AvroJob.getOutputSchema(job).toString()); } process.inputClient.input(datum.buffer(), datum.count()); } catch (IOException e) { error = true; throw e; } catch (Exception e) { error = true; throw new IOException(e); } }
/** Called with all map output values with a given key. By default, pairs * key with each value, collecting {@link Pair} instances. */ @SuppressWarnings("unchecked") public void reduce(K key, Iterable<V> values, AvroCollector<OUT> collector, Reporter reporter) throws IOException { if (outputPair == null) outputPair = new Pair<>(AvroJob.getOutputSchema(getConf())); for (V value : values) { outputPair.set(key, value); collector.collect((OUT)outputPair); } }
@SuppressWarnings("unchecked") public RecordWriter<TetherData, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = AvroJob.getOutputSchema(job); final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter()); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, CodecFactory.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } Path path = FileOutputFormat.getTaskOutputPath(job, name+AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<TetherData, NullWritable>() { public void write(TetherData datum, NullWritable ignore) throws IOException { writer.appendEncoded(datum.buffer()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); GenericData dataModel = AvroJob.createDataModel(conf); return new AvroWrapperSerializer(dataModel.createDatumWriter(schema)); }
final Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { boolean isMapOnly = job.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job); GenericData dataModel = AvroJob.createDataModel(job); final DataFileWriter<T> writer = new DataFileWriter<T>(dataModel.createDatumWriter(null)); configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name+EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { writer.append(wrapper.datum()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
public RecordWriter<TypedBytesWritable, TypedBytesWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { boolean isMapOnly = job.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job); final DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>(schema)); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } // copy metadata from job for (Map.Entry<String, String> e : job) { if (e.getKey().startsWith(AvroJob.TEXT_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue()); if (e.getKey().startsWith(AvroJob.BINARY_PREFIX)) writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()), URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1")); } Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new AvroRecordWriter(writer, schema); }
@Override public void setConf(Configuration conf) { super.setConf(conf); if (conf == null) { return; } _outputSchema = AvroJob.getOutputSchema(conf); AvroDistributedCacheFileReader modelReader = new AvroDistributedCacheFileReader(new JobConf(conf)); try { modelReader.build(conf.get(MODEL_PATH), _modelConsumer); _modelConsumer.done(); } catch (IOException e) { e.printStackTrace(); } _lambda = conf.getFloat(LAMBDA, 0); _ignoreValue = conf.getBoolean(BINARY_FEATURE, false); _logger.info("Loaded the model for test, size:" + _modelConsumer.get().size()); }
public void reduce(TetherData datum, Iterator<NullWritable> ignore, OutputCollector<TetherData, NullWritable> collector, Reporter reporter) throws IOException { try { if (process == null) { process = new TetheredProcess(job, collector, reporter); process.inputClient.configure (TaskType.REDUCE, AvroJob.getMapOutputSchema(job).toString(), AvroJob.getOutputSchema(job).toString()); } process.inputClient.input(datum.buffer(), datum.count()); } catch (IOException e) { error = true; throw e; } catch (Exception e) { error = true; throw new IOException(e); } }
/** Called with all map output values with a given key. By default, pairs * key with each value, collecting {@link Pair} instances. */ @SuppressWarnings("unchecked") public void reduce(K key, Iterable<V> values, AvroCollector<OUT> collector, Reporter reporter) throws IOException { if (outputPair == null) outputPair = new Pair<K,V>(AvroJob.getOutputSchema(getConf())); for (V value : values) { outputPair.set(key, value); collector.collect((OUT)outputPair); } }
/** Called with all map output values with a given key. By default, pairs * key with each value, collecting {@link Pair} instances. */ @SuppressWarnings("unchecked") public void reduce(K key, Iterable<V> values, AvroCollector<OUT> collector, Reporter reporter) throws IOException { if (outputPair == null) outputPair = new Pair<K,V>(AvroJob.getOutputSchema(getConf())); for (V value : values) { outputPair.set(key, value); collector.collect((OUT)outputPair); } }
@SuppressWarnings("unchecked") public RecordWriter<TetherData, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = AvroJob.getOutputSchema(job); final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter()); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, AvroOutputFormat.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } Path path = FileOutputFormat.getTaskOutputPath(job, name+AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<TetherData, NullWritable>() { public void write(TetherData datum, NullWritable ignore) throws IOException { writer.appendEncoded(datum.buffer()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair .getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); ReflectDataFactory factory = Avros.getReflectDataFactory(conf); ReflectDatumWriter<T> writer = factory.getWriter(schema); return new AvroWrapperSerializer(writer); }
@SuppressWarnings("unchecked") public RecordWriter<TetherData, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { Schema schema = AvroJob.getOutputSchema(job); final DataFileWriter writer = new DataFileWriter(new GenericDatumWriter()); if (FileOutputFormat.getCompressOutput(job)) { int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, CodecFactory.DEFAULT_DEFLATE_LEVEL); writer.setCodec(CodecFactory.deflateCodec(level)); } Path path = FileOutputFormat.getTaskOutputPath(job, name+AvroOutputFormat.EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<TetherData, NullWritable>() { public void write(TetherData datum, NullWritable ignore) throws IOException { writer.appendEncoded(datum.buffer()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair .getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); ReaderWriterFactory factory = AvroMode.fromShuffleConfiguration(conf); DatumWriter<T> writer = factory.getWriter(schema); return new AvroWrapperSerializer(writer); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); GenericData dataModel = AvroJob.createDataModel(conf); return new AvroWrapperSerializer(dataModel.createDatumWriter(schema)); }
public void reduce(TetherData datum, Iterator<NullWritable> ignore, OutputCollector<TetherData, NullWritable> collector, Reporter reporter) throws IOException { try { if (process == null) { process = new TetheredProcess(job, collector, reporter); process.inputClient.configure (TaskType.REDUCE, new Utf8(AvroJob.getMapOutputSchema(job).toString()), new Utf8(AvroJob.getOutputSchema(job).toString())); } process.inputClient.input(datum.buffer(), datum.count()); } catch (IOException e) { error = true; throw e; } catch (Exception e) { error = true; throw new IOException(e); } }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // AvroWrapper used for final output, AvroKey or AvroValue for map output boolean isFinalOutput = c.equals(AvroWrapper.class); Configuration conf = getConf(); Schema schema = isFinalOutput ? AvroJob.getOutputSchema(conf) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(conf)) : Pair.getValueSchema(AvroJob.getMapOutputSchema(conf))); ReflectDataFactory factory = Avros.getReflectDataFactory(conf); ReflectDatumWriter<T> writer = factory.getWriter(); writer.setSchema(schema); return new AvroWrapperSerializer(writer); }
/** Returns the specified output serializer. */ public Serializer<AvroWrapper<T>> getSerializer(Class<AvroWrapper<T>> c) { // Here we must rely on mapred.task.is.map to tell whether the map output // or final output is needed. boolean isMap = getConf().getBoolean("mapred.task.is.map", false); Schema schema = !isMap ? AvroJob.getOutputSchema(getConf()) : (AvroKey.class.isAssignableFrom(c) ? Pair.getKeySchema(AvroJob.getMapOutputSchema(getConf())) : Pair.getValueSchema(AvroJob.getMapOutputSchema(getConf()))); return new AvroWrapperSerializer(new SpecificDatumWriter<T>(schema)); }
@Override public RecordWriter<AvroWrapper<T>, NullWritable> getRecordWriter(FileSystem ignore, JobConf job, String name, Progressable prog) throws IOException { boolean isMapOnly = job.getNumReduceTasks() == 0; Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job); GenericData dataModel = AvroJob.createDataModel(job); final DataFileWriter<T> writer = new DataFileWriter<T>(dataModel.createDatumWriter(null)); configureDataFileWriter(writer, job); Path path = FileOutputFormat.getTaskOutputPath(job, name+EXT); writer.create(schema, path.getFileSystem(job).create(path)); return new RecordWriter<AvroWrapper<T>, NullWritable>() { public void write(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { writer.append(wrapper.datum()); } public void close(Reporter reporter) throws IOException { writer.close(); } }; }