@Override public E nextRecord(E reuseValue) throws IOException { if (reachedEnd()) { return null; } // if we start a new block, then register the event, and // restart the counter. if (dataFileReader.previousSync() != lastSync) { lastSync = dataFileReader.previousSync(); recordsReadSinceLastSync = 0; } recordsReadSinceLastSync++; if (reuseAvroValue) { return dataFileReader.next(reuseValue); } else { if (GenericRecord.class == avroValueType) { return dataFileReader.next(); } else { return dataFileReader.next(InstantiationUtil.instantiate(avroValueType, Object.class)); } } }
/** Reads and returns the first datum in a data file. */ static Object datumFromFile(Schema schema, String file) throws IOException { DataFileReader<Object> in = new DataFileReader<>(new File(file), new GenericDatumReader<>(schema)); try { return in.next(); } finally { in.close(); } }
/** * Read the last record in the file. */ private void initReader() throws IOException { long syncPos = trackerFile.length() - 256L; if (syncPos < 0) syncPos = 0L; reader.sync(syncPos); while (reader.hasNext()) { reader.next(metaCache); } }
@Override public KeyValue next() throws IOException { GenericRecord record = reader.next(); if (record != null) { return new KeyValue(offset++, serializeAvroRecord(writer, record)); } return null; }
/** {@inheritDoc} */ @Override public boolean nextKeyValue() throws IOException, InterruptedException { assert null != mAvroFileReader; if (mAvroFileReader.hasNext() && !mAvroFileReader.pastSync(mEndPosition)) { mCurrentRecord = mAvroFileReader.next(mCurrentRecord); return true; } return false; }
@Override public String readRecord(@Deprecated String reuse) throws IOException { if (this.dataFileReader == null) { return null; } if (this.dataFileReader.hasNext()) { return this.dataFileReader.next().toString(); } return null; }
@Override public Event readEvent() throws IOException { if (fileReader.hasNext()) { record = fileReader.next(record); out.reset(); datumWriter.write(record, encoder); encoder.flush(); // annotate header with 64-bit schema CRC hash in hex Event event = EventBuilder.withBody(out.toByteArray()); if (schemaType == AvroSchemaType.HASH) { event.getHeaders().put(AVRO_SCHEMA_HEADER_HASH, schemaHashString); } else { event.getHeaders().put(AVRO_SCHEMA_HEADER_LITERAL, schema.toString()); } return event; } return null; }
private GenericRecord getRecordFromFile(String path) throws IOException { DatumReader<GenericRecord> reader = new GenericDatumReader<>(); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(new File(path), reader); if (dataFileReader.hasNext()) { return dataFileReader.next(); } return null; }
private GenericRecord getRecordFromFile(String path) throws IOException { DatumReader<GenericRecord> reader = new GenericDatumReader<>(); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(new File(path), reader); while (dataFileReader.hasNext()) { return dataFileReader.next(); } return null; }
@Override public void reopen(FileInputSplit split, Tuple2<Long, Long> state) throws IOException { Preconditions.checkNotNull(split, "reopen() cannot be called on a null split."); Preconditions.checkNotNull(state, "reopen() cannot be called with a null initial state."); try { this.open(split); } finally { if (state.f0 != -1) { lastSync = state.f0; recordsReadSinceLastSync = state.f1; } } if (lastSync != -1) { // open and read until the record we were before // the checkpoint and discard the values dataFileReader.seek(lastSync); for (int i = 0; i < recordsReadSinceLastSync; i++) { dataFileReader.next(null); } } } }
private void updateRecordFromTestResource(String resourceName, String avroFileName) throws IOException { if (avroFileName == null) { avroFileName = resourceName + ".avro"; } recordSchema = new Schema.Parser().parse( getClass().getClassLoader().getResourceAsStream(resourceName + ".avsc") ); DatumReader<GenericRecord> reader = new GenericDatumReader<>(recordSchema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>( new File(getClass().getClassLoader().getResource(avroFileName).getPath()), reader); Assert.assertTrue(dataFileReader.hasNext()); record = dataFileReader.next(record); accessor = new AvroGenericRecordAccessor(record); }
static <T> T runOnPreview(byte[] bits, AvroPreviewProcessor<T> processor) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(); SeekableByteArrayInput sbai = new SeekableByteArrayInput(bits); DataFileReader<GenericRecord> dataFileReader = null; try { dataFileReader = new DataFileReader<>(sbai, datumReader); int headerLen = (int) dataFileReader.previousSync(); byte[] header = Arrays.copyOf(bits, headerLen); if (dataFileReader.hasNext()) { GenericRecord gr = dataFileReader.next(); return processor.process(header, gr, dataFileReader.getBlockCount(), dataFileReader.getBlockSize()); } else { throw new RuntimeException("Empty Avro file - cannot run preview! "); } } finally { try { if (dataFileReader!=null) dataFileReader.close(); } catch (IOException safeToIgnore) {} } }
private Map<String, GenericRecord> getGenericRecordMap(byte[] data, Schema schema, String key) throws IOException { // create a reader for the merged contet DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema); SeekableByteArrayInput input = new SeekableByteArrayInput(data); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(input, datumReader); // read all the records into a map to verify all the records are there Map<String,GenericRecord> records = new HashMap<>(); while (dataFileReader.hasNext()) { GenericRecord user = dataFileReader.next(); records.put(user.get(key).toString(), user); } return records; }
private void fileIsGoodAvro(Path path) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (FSDataInputStream in = fs.open(path, 0); FileOutputStream out = new FileOutputStream("target/FOO.avro")) { byte[] buffer = new byte[100]; int bytesRead; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } } java.io.File file = new File("target/FOO.avro"); try (DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(file, datumReader)) { GenericRecord user = null; while (dataFileReader.hasNext()) { user = dataFileReader.next(user); } } file.delete(); } }
private void checkFileContains(File repairedFile, String... lines) throws IOException { DataFileReader r = new DataFileReader<>(repairedFile, new GenericDatumReader<>(SCHEMA)); for (String line : lines) { assertEquals(line, r.next().toString()); } assertFalse(r.hasNext()); }
@Test public void testGenericRecord() throws IOException { final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath()); final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class); Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}"); outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE); outputFormat.setSchema(schema); output(outputFormat, schema); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader); while (dataFileReader.hasNext()) { GenericRecord record = dataFileReader.next(); assertEquals(record.get("user_name").toString(), "testUser"); assertEquals(record.get("favorite_number"), 1); assertEquals(record.get("favorite_color").toString(), "blue"); } //cleanup FileSystem fs = FileSystem.getLocalFileSystem(); fs.delete(outputPath, false); }
public void testGenericRead() throws IOException { DataFileReader<Object> reader = new DataFileReader<>(makeFile(), new GenericDatumReader<>()); try { Object datum = null; if (VALIDATE) { for (Object expected : new RandomData(SCHEMA, COUNT, SEED)) { datum = reader.next(datum); assertEquals(expected, datum); } } else { for (int i = 0; i < COUNT; i++) { datum = reader.next(datum); } } } finally { reader.close(); } }
User user = dataFileReader1.next(); result1.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor()); DataFileReader<ReflectiveUser> dataFileReader2 = new DataFileReader<>(avroOutput, userDatumReader2); while (dataFileReader2.hasNext()) { ReflectiveUser user = dataFileReader2.next(); result2.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor());
if (sbai.chunkCnt == 0) { // Find data in first chunk while (dataFileReader.hasNext() && dataFileReader.previousSync() == sync) { gr = dataFileReader.next(gr);
/** * Test that non-string map-keys are readable through ReflectDatumReader * This method should form the original map and should not return any * array of {key, value} as done by {@link #testGenericDatumRead()} */ private <T> List<T> testReflectDatumRead (String testType, byte[] bytes, T ... entityObjs) throws IOException { ReflectDatumReader<T> datumReader = new ReflectDatumReader<>(); SeekableByteArrayInput avroInputStream = new SeekableByteArrayInput(bytes); DataFileReader<T> fileReader = new DataFileReader<>(avroInputStream, datumReader); Schema schema = fileReader.getSchema(); T record = null; List<T> records = new ArrayList<>(); while (fileReader.hasNext()) { records.add (fileReader.next(record)); } return records; }