public AvroRecordReader(JobConf job, FileSplit split) throws IOException { this(DataFileReader.openReader (new FsInput(split.getPath(), job), AvroJob.createInputDataModel(job) .createDatumReader(AvroJob.getInputSchema(job))), split); }
@Override public RecordReader<AvroWrapper<T>, NullWritable> getRecordReader(InputSplit split, final JobConf job, Reporter reporter) throws IOException { final FileSplit file = (FileSplit)split; reporter.setStatus(file.toString()); final AvroColumnReader.Params params = new AvroColumnReader.Params(new HadoopInput(file.getPath(), job)); params.setModel(ReflectData.get()); if (job.get(AvroJob.INPUT_SCHEMA) != null) params.setSchema(AvroJob.getInputSchema(job)); return new RecordReader<AvroWrapper<T>, NullWritable>() { private AvroColumnReader<T> reader = new AvroColumnReader<>(params); private float rows = reader.getRowCount(); private long row; public AvroWrapper<T> createKey() { return new AvroWrapper<>(null); } public NullWritable createValue() { return NullWritable.get(); } public boolean next(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { if (!reader.hasNext()) return false; wrapper.datum(reader.next()); row++; return true; } public float getProgress() throws IOException { return row / rows; } public long getPos() throws IOException { return row; } public void close() throws IOException { reader.close(); } }; }
public void initialize(FileSplit split,Configuration conf) throws IOException, InterruptedException{ init(DataFileReader.openReader (new FsInput(split.getPath(), conf), conf.getBoolean(AvroJob.INPUT_IS_REFLECT, false) ? new ReflectDatumReader<T>(AvroJob.getInputSchema(conf)) : new SpecificDatumReader<T>(AvroJob.getInputSchema(conf))), split); }
public void initialize(FileSplit split,Configuration conf) throws IOException, InterruptedException{ init(DataFileReader.openReader (new FsInput(split.getPath(), conf), conf.getBoolean(AvroJob.INPUT_IS_REFLECT, false) ? new ReflectDatumReader<T>(AvroJob.getInputSchema(conf)) : new SpecificDatumReader<T>(AvroJob.getInputSchema(conf))), split); }
public AvroRecordReader(JobConf job, FileSplit split) throws IOException { this(DataFileReader.openReader (new FsInput(split.getPath(), job), AvroJob.createInputDataModel(job) .createDatumReader(AvroJob.getInputSchema(job))), split); }
public AvroRecordReader(JobConf job, FileSplit split) throws IOException { this.in = new FsInput(split.getPath(), job); Schema s = AvroJob.getInputSchema(job); this.reader = new DataFileReader<T>(in, new SpecificDatumReader<T>(s)); reader.sync(split.getStart()); // sync to start this.start = in.tell(); this.end = split.getStart() + split.getLength(); }
params.setModel(ReflectData.get()); if (job.get(AvroJob.INPUT_SCHEMA) != null) params.setSchema(AvroJob.getInputSchema(job));