@VisibleForTesting InlineSchemaAvroBytesDecoder(Schema schemaObj) { this.schemaObj = schemaObj; this.reader = new GenericDatumReader<>(schemaObj); this.schema = null; }
@Override public RowDecoder create(Map<String, String> decoderParams, Set<DecoderColumnHandle> columns) { String dataSchema = requireNonNull(decoderParams.get("dataSchema"), "dataSchema cannot be null"); Schema parsedSchema = (new Schema.Parser()).parse(dataSchema); return new AvroRowDecoder(new GenericDatumReader<>(parsedSchema), columns); } }
private void createStream() throws FileNotFoundException, IOException { if (keepIndexing) { avroDataStream = new DataFileStream<GenericRecord>(new FileInputStream(avroFile), new GenericDatumReader<GenericRecord>()); return; } avroDataStream = null; }
/** * Get the Avro file reader for the given file. */ public static DataFileStream<GenericRecord> getAvroReader(File avroFile) throws IOException { if (avroFile.getName().endsWith(".gz")) { return new DataFileStream<>(new GZIPInputStream(new FileInputStream(avroFile)), new GenericDatumReader<GenericRecord>()); } else { return new DataFileStream<>(new FileInputStream(avroFile), new GenericDatumReader<GenericRecord>()); } }
private static <T> AvroFactory<T> fromGeneric(ClassLoader cl, Schema schema) { checkNotNull(schema, "Unable to create an AvroSerializer with a GenericRecord type without a schema"); GenericData genericData = new GenericData(cl); return new AvroFactory<>( genericData, schema, new GenericDatumReader<>(schema, schema, genericData), new GenericDatumWriter<>(schema, genericData) ); }
public GenericAvroSchema(SchemaInfo schemaInfo) { super(schemaInfo); this.byteArrayOutputStream = new ByteArrayOutputStream(); this.encoder = EncoderFactory.get().binaryEncoder(this.byteArrayOutputStream, encoder); this.datumWriter = new GenericDatumWriter(schema); this.datumReader = new GenericDatumReader(schema); }
@JsonCreator public InlineSchemaAvroBytesDecoder( @JacksonInject @Json ObjectMapper mapper, @JsonProperty("schema") Map<String, Object> schema ) throws Exception { Preconditions.checkArgument(schema != null, "schema must be provided"); this.schema = schema; String schemaStr = mapper.writeValueAsString(schema); LOGGER.debug("Schema string [%s]", schemaStr); this.schemaObj = new Schema.Parser().parse(schemaStr); this.reader = new GenericDatumReader<>(this.schemaObj); }
@Override public GenericRecord parse(ByteBuffer bytes) { try { bytes.get(); // ignore first \0 byte int id = bytes.getInt(); // extract schema registry id int length = bytes.limit() - 1 - 4; int offset = bytes.position() + bytes.arrayOffset(); Schema schema = registry.getByID(id); DatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); return reader.read(null, DecoderFactory.get().binaryDecoder(bytes.array(), offset, length, null)); } catch (Exception e) { throw new ParseException(e, "Fail to decode avro message!"); } }
public void readFields(byte[] bytes, int offset, int length, Schema writerSchema, Schema readerSchema) throws IOException { fileSchema = writerSchema; record = new GenericData.Record(writerSchema); binaryDecoder = DecoderFactory.get().binaryDecoder(bytes, offset, length - offset, binaryDecoder); GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(writerSchema, readerSchema); record = gdr.read(null, binaryDecoder); }
void checkAvroInitialized() { if (datumReader != null) { return; } ClassLoader cl = Thread.currentThread().getContextClassLoader(); if (SpecificRecord.class.isAssignableFrom(recordClazz)) { SpecificData specificData = new SpecificData(cl); this.datumReader = new SpecificDatumReader<>(specificData); this.reader = specificData.getSchema(recordClazz); } else { this.reader = new Schema.Parser().parse(schemaString); GenericData genericData = new GenericData(cl); this.datumReader = new GenericDatumReader<>(null, this.reader, genericData); } this.inputStream = new MutableByteArrayInputStream(); this.decoder = DecoderFactory.get().binaryDecoder(inputStream, null); }
/** * Creates a Avro deserialization schema for the given Avro schema string. * * @param avroSchemaString Avro schema string to deserialize Avro's record to Flink's row */ public AvroRowDeserializationSchema(String avroSchemaString) { Preconditions.checkNotNull(avroSchemaString, "Avro schema must not be null."); recordClazz = null; final TypeInformation<?> typeInfo = AvroSchemaConverter.convertToTypeInfo(avroSchemaString); Preconditions.checkArgument(typeInfo instanceof RowTypeInfo, "Row type information expected."); this.typeInfo = (RowTypeInfo) typeInfo; schemaString = avroSchemaString; schema = new Schema.Parser().parse(avroSchemaString); record = new GenericData.Record(schema); datumReader = new GenericDatumReader<>(schema); inputStream = new MutableByteArrayInputStream(); decoder = DecoderFactory.get().binaryDecoder(inputStream, null); }
@Override public GenericContainer read(Kryo kryo, Input input, Class<GenericContainer> aClass) { Schema theSchema = this.getSchema(input.readString()); GenericDatumReader<GenericContainer> reader = new GenericDatumReader<>(theSchema); Decoder decoder = DecoderFactory .get() .directBinaryDecoder(input, null); GenericContainer foo; try { foo = reader.read(null, decoder); } catch (IOException e) { throw new RuntimeException(e); } return foo; } }
public void readFields(byte[] bytes, Schema writerSchema, Schema readerSchema) throws IOException { fileSchema = writerSchema; record = new GenericData.Record(writerSchema); GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(); gdr.setExpected(readerSchema); ByteArrayInputStream is = new ByteArrayInputStream(bytes); DataFileStream<GenericRecord> dfr = new DataFileStream<GenericRecord>(is, gdr); record = dfr.next(record); dfr.close(); }
@Override public void readFields(DataInput in) throws IOException { Schema schema = AvroSerdeUtils.getSchemaFor(in.readUTF()); fileSchema = AvroSerdeUtils.getSchemaFor(in.readUTF()); recordReaderID = UID.read(in); record = new GenericData.Record(schema); binaryDecoder = DecoderFactory.defaultFactory().createBinaryDecoder((InputStream) in, binaryDecoder); GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(schema); record = gdr.read(record, binaryDecoder); }
public static List<String> getColumnNamesFromAvro(File avro) throws IOException { List<String> ret = new ArrayList<String>(); DataFileStream<GenericRecord> dataStream = new DataFileStream<GenericRecord>(new FileInputStream(avro), new GenericDatumReader<GenericRecord>()); for (final Field field : dataStream.getSchema().getFields()) { ret.add(field.name()); } return ret; }
public void init() throws FileNotFoundException, IOException { dataStream = new DataFileStream<GenericRecord>(new FileInputStream(avroFile), new GenericDatumReader<GenericRecord>()); schema = dataStream.getSchema(); }
public Object toObject(byte[] bytes) { Decoder decoder = DecoderFactory.defaultFactory().createBinaryDecoder(bytes, null); GenericDatumReader<Object> reader = null; try { reader = new GenericDatumReader<Object>(typeDef); return reader.read(null, decoder); } catch(IOException e) { throw new SerializationException(e); } }
private DataFileReader<E> initReader(FileInputSplit split) throws IOException { DatumReader<E> datumReader; if (org.apache.avro.generic.GenericRecord.class == avroValueType) { datumReader = new GenericDatumReader<E>(); } else { datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType) ? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType); } if (LOG.isInfoEnabled()) { LOG.info("Opening split {}", split); } SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen()); DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader); if (LOG.isDebugEnabled()) { LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema()); } end = split.getStart() + split.getLength(); recordsReadSinceLastSync = 0; return dataFileReader; }
@Test public void testGenericRecord() throws IOException { final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath()); final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class); Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}"); outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE); outputFormat.setSchema(schema); output(outputFormat, schema); GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema); DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader); while (dataFileReader.hasNext()) { GenericRecord record = dataFileReader.next(); assertEquals(record.get("user_name").toString(), "testUser"); assertEquals(record.get("favorite_number"), 1); assertEquals(record.get("favorite_color").toString(), "blue"); } //cleanup FileSystem fs = FileSystem.getLocalFileSystem(); fs.delete(outputPath, false); }
@Override public void init(Map<String, String> props, Schema indexingSchema, String topicName) throws Exception { // Load Avro schema DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile); _avroSchema = reader.getSchema(); reader.close(); _rowGenerator = new AvroRecordToPinotRowGenerator(indexingSchema); _reader = new GenericDatumReader<>(_avroSchema); }