@Override public boolean reachedEnd() throws IOException { return !dataFileReader.hasNext() || dataFileReader.pastSync(end); }
@Override public List<Event> readEvents(int numEvents) throws IOException { List<Event> events = Lists.newArrayList(); for (int i = 0; i < numEvents && fileReader.hasNext(); i++) { Event event = readEvent(); if (event != null) { events.add(event); } } return events; }
/** * Read the last record in the file. */ private void initReader() throws IOException { long syncPos = trackerFile.length() - 256L; if (syncPos < 0) syncPos = 0L; reader.sync(syncPos); while (reader.hasNext()) { reader.next(metaCache); } }
/** {@inheritDoc} */ @Override public boolean nextKeyValue() throws IOException, InterruptedException { assert null != mAvroFileReader; if (mAvroFileReader.hasNext() && !mAvroFileReader.pastSync(mEndPosition)) { mCurrentRecord = mAvroFileReader.next(mCurrentRecord); return true; } return false; }
@Override public String readRecord(@Deprecated String reuse) throws IOException { if (this.dataFileReader == null) { return null; } if (this.dataFileReader.hasNext()) { return this.dataFileReader.next().toString(); } return null; }
@Override public Event readEvent() throws IOException { if (fileReader.hasNext()) { record = fileReader.next(record); out.reset(); datumWriter.write(record, encoder); encoder.flush(); // annotate header with 64-bit schema CRC hash in hex Event event = EventBuilder.withBody(out.toByteArray()); if (schemaType == AvroSchemaType.HASH) { event.getHeaders().put(AVRO_SCHEMA_HEADER_HASH, schemaHashString); } else { event.getHeaders().put(AVRO_SCHEMA_HEADER_LITERAL, schema.toString()); } return event; } return null; }
private GenericRecord getRecordFromFile(String path) throws IOException { DatumReader<GenericRecord> reader = new GenericDatumReader<>(); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(new File(path), reader); if (dataFileReader.hasNext()) { return dataFileReader.next(); } return null; }
private GenericRecord getRecordFromFile(String path) throws IOException { DatumReader<GenericRecord> reader = new GenericDatumReader<>(); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(new File(path), reader); while (dataFileReader.hasNext()) { return dataFileReader.next(); } return null; }
private void updateRecordFromTestResource(String resourceName, String avroFileName) throws IOException { if (avroFileName == null) { avroFileName = resourceName + ".avro"; } recordSchema = new Schema.Parser().parse( getClass().getClassLoader().getResourceAsStream(resourceName + ".avsc") ); DatumReader<GenericRecord> reader = new GenericDatumReader<>(recordSchema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>( new File(getClass().getClassLoader().getResource(avroFileName).getPath()), reader); Assert.assertTrue(dataFileReader.hasNext()); record = dataFileReader.next(record); accessor = new AvroGenericRecordAccessor(record); }
static <T> T runOnPreview(byte[] bits, AvroPreviewProcessor<T> processor) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(); SeekableByteArrayInput sbai = new SeekableByteArrayInput(bits); DataFileReader<GenericRecord> dataFileReader = null; try { dataFileReader = new DataFileReader<>(sbai, datumReader); int headerLen = (int) dataFileReader.previousSync(); byte[] header = Arrays.copyOf(bits, headerLen); if (dataFileReader.hasNext()) { GenericRecord gr = dataFileReader.next(); return processor.process(header, gr, dataFileReader.getBlockCount(), dataFileReader.getBlockSize()); } else { throw new RuntimeException("Empty Avro file - cannot run preview! "); } } finally { try { if (dataFileReader!=null) dataFileReader.close(); } catch (IOException safeToIgnore) {} } }
private Map<String, GenericRecord> getGenericRecordMap(byte[] data, Schema schema, String key) throws IOException { // create a reader for the merged contet DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(schema); SeekableByteArrayInput input = new SeekableByteArrayInput(data); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(input, datumReader); // read all the records into a map to verify all the records are there Map<String,GenericRecord> records = new HashMap<>(); while (dataFileReader.hasNext()) { GenericRecord user = dataFileReader.next(); records.put(user.get(key).toString(), user); } return records; }
private void fileIsGoodAvro(Path path) throws IOException { DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); try (FSDataInputStream in = fs.open(path, 0); FileOutputStream out = new FileOutputStream("target/FOO.avro")) { byte[] buffer = new byte[100]; int bytesRead; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } } java.io.File file = new File("target/FOO.avro"); try (DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(file, datumReader)) { GenericRecord user = null; while (dataFileReader.hasNext()) { user = dataFileReader.next(user); } } file.delete(); } }
public boolean next(TetherData data, NullWritable ignore) throws IOException { if (!reader.hasNext() || reader.pastSync(end)) return false; data.buffer(reader.nextBlock()); data.count((int)reader.getBlockCount()); return true; }
private void checkFileContains(File repairedFile, String... lines) throws IOException { DataFileReader r = new DataFileReader<>(repairedFile, new GenericDatumReader<>(SCHEMA)); for (String line : lines) { assertEquals(line, r.next().toString()); } assertFalse(r.hasNext()); }
@Test public void testGenericRecord() throws IOException { final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath()); final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class); Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}"); outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE); outputFormat.setSchema(schema); output(outputFormat, schema); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader); while (dataFileReader.hasNext()) { GenericRecord record = dataFileReader.next(); assertEquals(record.get("user_name").toString(), "testUser"); assertEquals(record.get("favorite_number"), 1); assertEquals(record.get("favorite_color").toString(), "blue"); } //cleanup FileSystem fs = FileSystem.getLocalFileSystem(); fs.delete(outputPath, false); }
while (dataFileReader1.hasNext()) { User user = dataFileReader1.next(); result1.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor()); for (File avroOutput : Objects.requireNonNull(output2)) { DataFileReader<ReflectiveUser> dataFileReader2 = new DataFileReader<>(avroOutput, userDatumReader2); while (dataFileReader2.hasNext()) { ReflectiveUser user = dataFileReader2.next(); result2.add(user.getName() + "|" + user.getFavoriteNumber() + "|" + user.getFavoriteColor());
/** * Test that non-string map-keys are readable through GenericDatumReader * This method should read as array of {key, value} and not as a map */ private <T> List<GenericRecord> testGenericDatumRead (String testType, byte[] bytes, T ... entityObjs) throws IOException { GenericDatumReader<GenericRecord> datumReader = new GenericDatumReader<>(); SeekableByteArrayInput avroInputStream = new SeekableByteArrayInput(bytes); DataFileReader<GenericRecord> fileReader = new DataFileReader<>(avroInputStream, datumReader); Schema schema = fileReader.getSchema(); assertNotNull("Unable to get schema for " + testType, schema); GenericRecord record = null; List<GenericRecord> records = new ArrayList<>(); while (fileReader.hasNext()) { records.add (fileReader.next(record)); } return records; }
long sync = dataFileReader.previousSync(); if (sbai.chunkCnt == 0) { // Find data in first chunk while (dataFileReader.hasNext() && dataFileReader.previousSync() == sync) { gr = dataFileReader.next(gr);
/** * Test that non-string map-keys are readable through ReflectDatumReader * This method should form the original map and should not return any * array of {key, value} as done by {@link #testGenericDatumRead()} */ private <T> List<T> testReflectDatumRead (String testType, byte[] bytes, T ... entityObjs) throws IOException { ReflectDatumReader<T> datumReader = new ReflectDatumReader<>(); SeekableByteArrayInput avroInputStream = new SeekableByteArrayInput(bytes); DataFileReader<T> fileReader = new DataFileReader<>(avroInputStream, datumReader); Schema schema = fileReader.getSchema(); T record = null; List<T> records = new ArrayList<>(); while (fileReader.hasNext()) { records.add (fileReader.next(record)); } return records; }
@Test public void testAvroEvolvedRead() throws IOException { File serializedAvro = File.createTempFile("avro", null); DatumWriter<GenericRecord> dw = new GenericDatumWriter<>(writer); DataFileWriter<GenericRecord> dfw = new DataFileWriter<>(dw); dfw.create(writer, serializedAvro); dfw.append(writtenRecord); dfw.flush(); dfw.close(); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(writer); reader.setExpected(evolved); DataFileReader<GenericRecord> dfr = new DataFileReader<>(serializedAvro, reader); GenericRecord readRecord = dfr.next(); Assert.assertEquals(evolvedRecord, readRecord); Assert.assertFalse(dfr.hasNext()); }