/** {@inheritDoc} */ @Override public void close() throws IOException { reader.close(); }
@Override public D next() { try { for (int i = 0; i < values.length; i++) if (values[i] != null) values[i].startRow(); this.column = 0; return (D)read(readSchema); } catch (IOException e) { throw new TrevniRuntimeException(e); } }
private void checkRead(Schema s, List<Object> data) throws Exception { AvroColumnReader<Object> reader = new AvroColumnReader<>(new AvroColumnReader.Params(FILE) .setSchema(s)); try { for (Object datum : data) assertEquals(datum, reader.next()); } finally { reader.close(); } }
public void validateCountsFileGenericRecord() throws Exception { AvroColumnReader<GenericRecord > reader = new AvroColumnReader<> ( new AvroColumnReader.Params(countFiles).setModel(SpecificData.get())); int numWords = 0; for (GenericRecord wc : reader) { assertEquals((String)wc.get("key"), COUNTS.get(wc.get("key")), (Long)wc.get("value")); //assertEquals(wc.getKey(), COUNTS.get(wc.getKey()), wc.getValue()); numWords++; } reader.close(); assertEquals(COUNTS.size(), numWords); }
@Test public void test() throws Exception { Schema schema = new Schema.Parser().parse(SCHEMA_FILE); DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>()); writer.create(schema, Util.createFromFS(AVRO_FILE.toString())); for (Object datum : new RandomData(schema, COUNT, SEED)) writer.append(datum); writer.close(); run(AVRO_FILE.toString(), TREVNI_FILE.toString()); AvroColumnReader<Object> reader = new AvroColumnReader<>(new AvroColumnReader.Params(TREVNI_FILE)); Iterator<Object> found = reader.iterator(); for (Object expected : new RandomData(schema, COUNT, SEED)) assertEquals(expected, found.next()); reader.close(); }
@Test public void testTrevniEvolvedRead() throws IOException { AvroColumnWriter<GenericRecord> acw = new AvroColumnWriter<>(writer, new ColumnFileMetaData()); acw.write(writtenRecord); File serializedTrevni = File.createTempFile("trevni", null); acw.writeTo(serializedTrevni); AvroColumnReader.Params params = new Params(serializedTrevni); params.setSchema(evolved); AvroColumnReader<GenericRecord> acr = new AvroColumnReader<>(params); GenericRecord readRecord = acr.next(); Assert.assertEquals(evolvedRecord, readRecord); Assert.assertFalse(acr.hasNext()); }
/** {@inheritDoc} */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { final FileSplit file = (FileSplit)inputSplit; context.setStatus(file.toString()); final AvroColumnReader.Params params = new AvroColumnReader.Params(new HadoopInput(file.getPath(), context.getConfiguration())); params.setModel(ReflectData.get()); if (AvroJob.getInputKeySchema(context.getConfiguration()) != null) { params.setSchema(AvroJob.getInputKeySchema(context.getConfiguration())); } reader = new AvroColumnReader<>(params); rows = reader.getRowCount(); }
new HadoopInput(filePath, job.getConfiguration())); AvroColumnReader<GenericData.Record> reader = new AvroColumnReader<GenericData.Record>(params); Schema s = reader.getFileSchema(); reader.close(); return s;
private Object read(Schema s) throws IOException { if (isSimple(s)) return nextValue(s, column++); map.put(key, read(s.getValueType())); // value Object value = ((rDefaults != null) && rDefaults.containsKey(f.name())) ? model.deepCopy(f.schema(), rDefaults.get(f.name())) : read(f.schema()); model.setField(record, f.name(), f.pos(), value); for (int i = 0; i < length; i++) { this.column = startColumn; Object value = nextValue(s, column++); if (!isSimple(s.getElementType())) value = read(s.getElementType()); elements.add(value); if (branch.getType() == Schema.Type.NULL) continue; if (values[column].nextLength() == 1) { value = nextValue(branch, column); column++; if (!isSimple(branch)) value = read(branch); } else { column += arrayWidths[column];
/** Construct a reader for a file. */ public AvroColumnReader(Params params) throws IOException { this.reader = new ColumnFileReader(params.input); this.model = params.model; this.fileSchema = Schema.parse(reader.getMetaData().getString(AvroColumnWriter.SCHEMA_KEY)); this.readSchema = params.schema == null ? fileSchema : params.schema; initialize(); }
private void checkRead(Schema schema) throws IOException { AvroColumnReader<Object> reader = new AvroColumnReader<>(new AvroColumnReader.Params(FILE) .setSchema(schema)); for (Object expected : new RandomData(schema, COUNT, SEED)) assertEquals(expected, reader.next()); reader.close(); }
public void validateCountsFile() throws Exception { AvroColumnReader<Pair<String,Long>> reader = new AvroColumnReader<>( new AvroColumnReader.Params(countFiles).setModel(SpecificData.get())); int numWords = 0; for (Pair<String,Long> wc : reader) { assertEquals(wc.key(), COUNTS.get(wc.key()), wc.value()); numWords++; } reader.close(); assertEquals(COUNTS.size(), numWords); }
@Override public RecordReader<AvroWrapper<T>, NullWritable> getRecordReader(InputSplit split, final JobConf job, Reporter reporter) throws IOException { final FileSplit file = (FileSplit)split; reporter.setStatus(file.toString()); final AvroColumnReader.Params params = new AvroColumnReader.Params(new HadoopInput(file.getPath(), job)); params.setModel(ReflectData.get()); if (job.get(AvroJob.INPUT_SCHEMA) != null) params.setSchema(AvroJob.getInputSchema(job)); return new RecordReader<AvroWrapper<T>, NullWritable>() { private AvroColumnReader<T> reader = new AvroColumnReader<>(params); private float rows = reader.getRowCount(); private long row; public AvroWrapper<T> createKey() { return new AvroWrapper<>(null); } public NullWritable createValue() { return NullWritable.get(); } public boolean next(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException { if (!reader.hasNext()) return false; wrapper.datum(reader.next()); row++; return true; } public float getProgress() throws IOException { return row / rows; } public long getPos() throws IOException { return row; } public void close() throws IOException { reader.close(); } }; }
private Object read(Schema s) throws IOException { if (isSimple(s)) return nextValue(s, column++); map.put(key, read(s.getValueType())); // value Object record = model.newRecord(null, s); for (Field f : s.getFields()) model.setField(record, f.name(), f.pos(), read(f.schema())); return record; case ARRAY: for (int i = 0; i < length; i++) { this.column = startColumn; Object value = nextValue(s, column++); if (!isSimple(s.getElementType())) value = read(s.getElementType()); elements.add(value); if (branch.getType() == Schema.Type.NULL) continue; if (values[column].nextLength() == 1) { value = nextValue(s, column); column++; if (!isSimple(branch)) value = read(branch); } else { column += arrayWidths[column];
/** Construct a reader for a file. */ public AvroColumnReader(Params params) throws IOException { this.reader = new ColumnFileReader(params.input); this.model = params.model; this.fileSchema = Schema.parse(reader.getMetaData().getString(AvroColumnWriter.SCHEMA_KEY)); this.readSchema = params.schema == null ? fileSchema : params.schema; initialize(); }
private void checkRead(Schema s, List<Object> data) throws Exception { AvroColumnReader<Object> reader = new AvroColumnReader<Object>(new AvroColumnReader.Params(FILE) .setSchema(s)); try { for (Object datum : data) assertEquals(datum, reader.next()); } finally { reader.close(); } }
public static void validateCountsFile() throws Exception { AvroColumnReader<Pair<String,Long>> reader = new AvroColumnReader<Pair<String,Long>> (new AvroColumnReader.Params(COUNTS_FILE).setModel(SpecificData.get())); int numWords = 0; for (Pair<String,Long> wc : reader) { assertEquals(wc.key(), COUNTS.get(wc.key()), wc.value()); numWords++; } reader.close(); assertEquals(COUNTS.size(), numWords); }
private AvroColumnReader<T> reader = new AvroColumnReader<T>(params); private float rows = reader.getRowCount(); private long row;