public void readFields(byte[] bytes, Schema writerSchema, Schema readerSchema) throws IOException { fileSchema = writerSchema; record = new GenericData.Record(writerSchema); GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); gdr.setExpected(readerSchema); ByteArrayInputStream is = new ByteArrayInputStream(bytes); DataFileStream<GenericRecord> dfr = new DataFileStream<GenericRecord>(is, gdr); record = dfr.next(record); dfr.close(); }
public Object toObject(byte[] bytes) { Integer version = Integer.valueOf(bytes[0]); if(version > newestVersion) throw new SerializationException("Client needs to rebootstrap! \n Writer's schema version greater than Reader"); Schema typeDefWriter = Schema.parse(typeDefVersions.get(version)); Decoder decoder = DecoderFactory.defaultFactory().createBinaryDecoder(bytes, 1, bytes.length - 1, null); GenericDatumReader<Object> reader = null; try { reader = new GenericDatumReader<Object>(typeDefWriter, typeDef); // writer's schema reader.setSchema(typeDefWriter); // Reader's schema reader.setExpected(typeDef); return reader.read(null, decoder); } catch(IOException e) { throw new SerializationException(e); } }
Decoder decoder = DecoderFactory.get().binaryDecoder(data, 1 + MD5Digest.MD5_BYTES_LENGTH, data.length - MD5Digest.MD5_BYTES_LENGTH - 1, null); _datumReader.setExpected(outputSchema); _datumReader.setSchema(schema); try {
private Record decodeGenericBlob(Schema expectedSchema, Schema schemaOfBlob, byte[] blob) throws IOException { if (blob == null) { return null; } GenericDatumReader<Record> reader = new GenericDatumReader<>(); reader.setExpected(expectedSchema); reader.setSchema(schemaOfBlob); Decoder decoder = encoderType == EncoderType.BINARY ? DecoderFactory.get().binaryDecoder(blob, null) : DecoderFactory.get().jsonDecoder(schemaOfBlob, new ByteArrayInputStream(blob)); return reader.read(null, decoder); } }
@Override public T deserialize(byte[] message) throws IOException { checkAvroInitialized(); getInputStream().setBuffer(message); Schema writerSchema = schemaCoder.readSchema(getInputStream()); Schema readerSchema = getReaderSchema(); GenericDatumReader<T> datumReader = getDatumReader(); datumReader.setSchema(writerSchema); datumReader.setExpected(readerSchema); return datumReader.read(null, getDecoder()); }
public AvroGenericRecordReader(JobConf job, FileSplit split, Reporter reporter) throws IOException { this.jobConf = job; Schema latest; try { latest = getSchema(job, split); } catch (AvroSerdeException e) { throw new IOException(e); } GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); if(latest != null) { gdr.setExpected(latest); } if (split.getLength() == 0) { this.isEmptyInput = true; this.start = 0; this.reader = null; } else { this.isEmptyInput = false; this.reader = new DataFileReader<GenericRecord>(new FsInput(split.getPath(), job), gdr); this.reader.sync(split.getStart()); this.start = reader.tell(); } this.stop = split.getStart() + split.getLength(); this.recordReaderID = new UID(); }
@Test public void testAvroEvolvedRead() throws IOException { File serializedAvro = File.createTempFile("avro", null); DatumWriter<GenericRecord> dw = new GenericDatumWriter<>(writer); DataFileWriter<GenericRecord> dfw = new DataFileWriter<>(dw); dfw.create(writer, serializedAvro); dfw.append(writtenRecord); dfw.flush(); dfw.close(); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(writer); reader.setExpected(evolved); DataFileReader<GenericRecord> dfr = new DataFileReader<>(serializedAvro, reader); GenericRecord readRecord = dfr.next(); Assert.assertEquals(evolvedRecord, readRecord); Assert.assertFalse(dfr.hasNext()); }
public void readFields(byte[] bytes, Schema writerSchema, Schema readerSchema) throws IOException { fileSchema = writerSchema; record = new GenericData.Record(writerSchema); GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); gdr.setExpected(readerSchema); ByteArrayInputStream is = new ByteArrayInputStream(bytes); DataFileStream<GenericRecord> dfr = new DataFileStream<GenericRecord>(is, gdr); record = dfr.next(record); dfr.close(); }
public void readFields(byte[] bytes, Schema writerSchema, Schema readerSchema) throws IOException { fileSchema = writerSchema; record = new GenericData.Record(writerSchema); GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); gdr.setExpected(readerSchema); ByteArrayInputStream is = new ByteArrayInputStream(bytes); DataFileStream<GenericRecord> dfr = new DataFileStream<GenericRecord>(is, gdr); record = dfr.next(record); dfr.close(); }
public void readFields(byte[] bytes, Schema writerSchema, Schema readerSchema) throws IOException { fileSchema = writerSchema; record = new GenericData.Record(writerSchema); GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); gdr.setExpected(readerSchema); ByteArrayInputStream is = new ByteArrayInputStream(bytes); DataFileStream<GenericRecord> dfr = new DataFileStream<GenericRecord>(is, gdr); record = dfr.next(record); dfr.close(); }
public void readFields(byte[] bytes, Schema writerSchema, Schema readerSchema) throws IOException { fileSchema = writerSchema; record = new GenericData.Record(writerSchema); GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); gdr.setExpected(readerSchema); ByteArrayInputStream is = new ByteArrayInputStream(bytes); DataFileStream<GenericRecord> dfr = new DataFileStream<GenericRecord>(is, gdr); record = dfr.next(record); dfr.close(); }
public static List<HoodieRecord<HoodieAvroPayload>> loadFromFile(FileSystem fs, String deltaFilePath, Schema expectedSchema) { List<HoodieRecord<HoodieAvroPayload>> loadedRecords = Lists.newArrayList(); Path path = new Path(deltaFilePath); try { SeekableInput input = new FsInput(path, fs.getConf()); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(); // Set the expected schema to be the current schema to account for schema evolution reader.setExpected(expectedSchema); FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader); for (GenericRecord deltaRecord : fileReader) { String key = deltaRecord.get(HoodieRecord.RECORD_KEY_METADATA_FIELD).toString(); String partitionPath = deltaRecord.get(HoodieRecord.PARTITION_PATH_METADATA_FIELD).toString(); loadedRecords.add(new HoodieRecord<>(new HoodieKey(key, partitionPath), new HoodieAvroPayload(Optional.of(deltaRecord)))); } fileReader.close(); // also closes underlying FsInput } catch (IOException e) { throw new HoodieIOException("Could not read avro records from path " + deltaFilePath, e); } return loadedRecords; }
Decoder decoder = DecoderFactory.get().binaryDecoder(data, 1 + MD5Digest.MD5_BYTES_LENGTH, data.length - MD5Digest.MD5_BYTES_LENGTH - 1, null); _datumReader.setExpected(outputSchema); _datumReader.setSchema(schema); try {
public AvroGenericRecordReader(JobConf job, FileSplit split, Reporter reporter) throws IOException { this.jobConf = job; Schema latest; try { latest = getSchema(job, split); } catch (HaivvreoException e) { throw new IOException(e); } GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); if(latest != null) gdr.setExpected(latest); this.reader = new DataFileReader<GenericRecord>(new FsInput(split.getPath(), job), gdr); this.reader.sync(split.getStart()); this.start = reader.tell(); this.stop = split.getStart() + split.getLength(); }
public AvroGenericRecordReader(JobConf job, FileSplit split, Reporter reporter) throws IOException { this.jobConf = job; Schema latest; try { latest = getSchema(job, split); } catch (AvroSerdeException e) { throw new IOException(e); } GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); if(latest != null) gdr.setExpected(latest); this.reader = new DataFileReader<GenericRecord>(new FsInput(split.getPath(), job), gdr); this.reader.sync(split.getStart()); this.start = reader.tell(); this.stop = split.getStart() + split.getLength(); }
public AvroGenericRecordReader(JobConf job, FileSplit split, Reporter reporter) throws IOException { this.jobConf = job; Schema latest; try { latest = getSchema(job, split); } catch (AvroSerdeException e) { throw new IOException(e); } GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); if(latest != null) { gdr.setExpected(latest); } this.reader = new DataFileReader<GenericRecord>(new FsInput(split.getPath(), job), gdr); this.reader.sync(split.getStart()); this.start = reader.tell(); this.stop = split.getStart() + split.getLength(); this.recordReaderID = new UID(); }
@Override public T deserialize(byte[] message) throws IOException { checkAvroInitialized(); getInputStream().setBuffer(message); Schema writerSchema = schemaCoder.readSchema(getInputStream()); Schema readerSchema = getReaderSchema(); GenericDatumReader<T> datumReader = getDatumReader(); datumReader.setSchema(writerSchema); datumReader.setExpected(readerSchema); return datumReader.read(null, getDecoder()); }
@Override public T deserialize(byte[] message) throws IOException { checkAvroInitialized(); getInputStream().setBuffer(message); Schema writerSchema = schemaCoder.readSchema(getInputStream()); Schema readerSchema = getReaderSchema(); GenericDatumReader<T> datumReader = getDatumReader(); datumReader.setSchema(writerSchema); datumReader.setExpected(readerSchema); return datumReader.read(null, getDecoder()); }