public JSONFileReader(InputStream incoming, Schema schema, Class<E> type) { this.fs = null; this.path = null; this.incoming = incoming; this.schema = schema; this.model = DataModelUtil.getDataModelForType(type); this.state = ReaderWriterState.NEW; }
/** * Get the reader schema based on the given type and writer schema. * * @param <E> The entity type * @param type The Java class of the entity type * @param schema The {@link Schema} for the entity * @return The reader schema based on the given type and writer schema */ public static <E> Schema getReaderSchema(Class<E> type, Schema schema) { Schema readerSchema = schema; GenericData dataModel = getDataModelForType(type); if (dataModel instanceof SpecificData) { readerSchema = ((SpecificData)dataModel).getSchema(type); } return readerSchema; }
public JSONFileReader(FileSystem fileSystem, Path path, EntityAccessor<E> accessor) { this.fs = fileSystem; this.path = path; this.schema = accessor.getReadSchema(); this.model = DataModelUtil.getDataModelForType(accessor.getType()); this.state = ReaderWriterState.NEW; }
private static void setConfigProperties(Configuration conf, Format format, Schema schema, Class<?> type) { GenericData model = DataModelUtil.getDataModelForType(type); if (Formats.AVRO.equals(format)) { setModel.invoke(conf, model.getClass()); conf.set(AVRO_SCHEMA_INPUT_KEY, schema.toString()); } else if (Formats.PARQUET.equals(format)) { // TODO: update to a version of Parquet with setAvroDataSupplier //AvroReadSupport.setAvroDataSupplier(conf, // DataModelUtil.supplierClassFor(model)); AvroReadSupport.setAvroReadSchema(conf, schema); } }
/** * Get the writer schema based on the given type and dataset schema. * * @param <E> The entity type * @param type The Java class of the entity type * @param schema The {@link Schema} for the entity * @return The reader schema based on the given type and writer schema */ public static <E> Schema getWriterSchema(Class<E> type, Schema schema) { Schema writerSchema = schema; GenericData dataModel = getDataModelForType(type); if (dataModel instanceof AllowNulls) { // assume fields are non-null by default to avoid schema conflicts dataModel = ReflectData.get(); } if (dataModel instanceof SpecificData) { writerSchema = ((SpecificData)dataModel).getSchema(type); } return writerSchema; }
EntityAccessor(Class<E> type, Schema schema) { this.type = DataModelUtil.resolveType(type, schema); this.schema = DataModelUtil.getReaderSchema(this.type, schema); this.writeSchema = DataModelUtil.getWriterSchema(this.type, this.schema); this.model = DataModelUtil.getDataModelForType(this.type); }
/** * Get the DatumReader for the given type. * * @param <E> The entity type * @param type The Java class of the entity type * @param writerSchema The {@link Schema} for entities * @return The DatumReader for the given type */ @SuppressWarnings("unchecked") public static <E> DatumReader<E> getDatumReaderForType(Class<E> type, Schema writerSchema) { Schema readerSchema = getReaderSchema(type, writerSchema); GenericData dataModel = getDataModelForType(type); if (dataModel instanceof ReflectData) { return new ReflectDatumReader<E>(writerSchema, readerSchema, (ReflectData)dataModel); } else if (dataModel instanceof SpecificData) { return new SpecificDatumReader<E>(writerSchema, readerSchema, (SpecificData)dataModel); } else { return new GenericDatumReader<E>(writerSchema, readerSchema, dataModel); } }
@Test public void testDataModelForSpecificType() { Class<StandardEvent> type = StandardEvent.class; GenericData result = DataModelUtil.getDataModelForType(type); assertEquals(SpecificData.class, result.getClass()); }
@Test public void testDataModelForGenericType() { Class<GenericData.Record> type = GenericData.Record.class; GenericData result = DataModelUtil.getDataModelForType(type); assertEquals(GenericData.class, result.getClass()); }
@Test public void testDataModelForReflectType() { Class<String> type = String.class; GenericData result = DataModelUtil.getDataModelForType(type); assertEquals(DataModelUtil.AllowNulls.class, result.getClass()); }
public DatasetRecordWriter(View<E> view, boolean copyRecords) { this.datasetWriter = view.newWriter(); this.schema = view.getDataset().getDescriptor().getSchema(); this.dataModel = DataModelUtil.getDataModelForType( view.getType()); this.copyRecords = copyRecords; }
public DatasetRecordWriter(View<E> view, boolean copyRecords) { this.datasetWriter = view.newWriter(); this.schema = view.getDataset().getDescriptor().getSchema(); this.dataModel = DataModelUtil.getDataModelForType( view.getType()); this.copyRecords = copyRecords; }