public void setView(View<E> view) { this.accessor = DataModelUtil.accessor(view.getType(), view.getSchema()); }
EntityAccessor(Class<E> type, Schema schema) { this.type = DataModelUtil.resolveType(type, schema); this.schema = DataModelUtil.getReaderSchema(this.type, schema); this.writeSchema = DataModelUtil.getWriterSchema(this.type, this.schema); this.model = DataModelUtil.getDataModelForType(this.type); }
@Override public E next() { Preconditions.checkState(state.equals(ReaderWriterState.OPEN), "Attempt to read from a file in state:%s", state); E record = DataModelUtil.createRecord(type, schema); try { return reader.next(record); } catch (IOException ex) { throw new DatasetIOException("Cannot advance reader", ex); } }
public AbstractDataset(Class<E> type, Schema schema) { this.type = DataModelUtil.resolveType(type, schema); this.schema = DataModelUtil.getReaderSchema(this.type, schema); }
/** * Get the DatumReader for the given type. * * @param <E> The entity type * @param type The Java class of the entity type * @param writerSchema The {@link Schema} for entities * @return The DatumReader for the given type */ @SuppressWarnings("unchecked") public static <E> DatumReader<E> getDatumReaderForType(Class<E> type, Schema writerSchema) { Schema readerSchema = getReaderSchema(type, writerSchema); GenericData dataModel = getDataModelForType(type); if (dataModel instanceof ReflectData) { return new ReflectDatumReader<E>(writerSchema, readerSchema, (ReflectData)dataModel); } else if (dataModel instanceof SpecificData) { return new SpecificDatumReader<E>(writerSchema, readerSchema, (SpecificData)dataModel); } else { return new GenericDatumReader<E>(writerSchema, readerSchema, dataModel); } }
public JSONFileReader(InputStream incoming, Schema schema, Class<E> type) { this.fs = null; this.path = null; this.incoming = incoming; this.schema = schema; this.model = DataModelUtil.getDataModelForType(type); this.state = ReaderWriterState.NEW; }
/** * Sets the expected schema to use when reading records from the Dataset. * <p> * If this schema is set, {@link #withType(Class)} should only be called * with a generic record subclass. * * @param readerSchema the expected entity schema * @return this for method chaining * @since 1.1.0 */ public ConfigBuilder withSchema(Schema readerSchema) { Class<?> type = conf.getClass(KITE_TYPE, null); Preconditions.checkArgument( type == null || DataModelUtil.isGeneric(type), "Can't configure a reader schema when a type is already set: {}", type); conf.set(KITE_READER_SCHEMA, readerSchema.toString()); return this; }
public CSVRecordParser(CSVProperties props, Schema schema, Class<E> type, @Nullable List<String> header) { this.parser = CSVUtil.newParser(props); this.builder = new CSVRecordBuilder<E>( DataModelUtil.getReaderSchema(type, schema), type, getHeader(props, header)); }
@Test public void testResolveTypeObjectToSpecifc() { Class<Object> type = Object.class; Schema schema = StandardEvent.getClassSchema(); Class expResult = StandardEvent.class; Class result = DataModelUtil.resolveType(type, schema); assertEquals(expResult, result); }
@Override public void initialize() { Preconditions.checkState(state.equals(ReaderWriterState.NEW), "A reader may not be opened more than once - current state:%s", state); LOG.debug("Opening reader on path:{}", path); try { reader = new DataFileReader<E>(new AvroFSInput(fileSystem.open(path), fileSystem.getFileStatus(path).getLen()), DataModelUtil.getDatumReaderForType(type, schema)); } catch (IOException e) { throw new DatasetIOException("Unable to create reader path:" + path, e); } state = ReaderWriterState.OPEN; }
/** * Get the reader schema based on the given type and writer schema. * * @param <E> The entity type * @param type The Java class of the entity type * @param schema The {@link Schema} for the entity * @return The reader schema based on the given type and writer schema */ public static <E> Schema getReaderSchema(Class<E> type, Schema schema) { Schema readerSchema = schema; GenericData dataModel = getDataModelForType(type); if (dataModel instanceof SpecificData) { readerSchema = ((SpecificData)dataModel).getSchema(type); } return readerSchema; }
/** * Sets the expected schema to use when reading records from the Dataset. * <p> * If this schema is set, {@link #withType(Class)} should only be called * with a generic record subclass. * * @param readerSchema the expected entity schema * @return this for method chaining * @since 1.1.0 */ public ConfigBuilder withSchema(Schema readerSchema) { Class<?> type = conf.getClass(KITE_TYPE, null); Preconditions.checkArgument( type == null || DataModelUtil.isGeneric(type), "Can't configure a reader schema when a type is already set: {}", type); conf.set(KITE_READER_SCHEMA, readerSchema.toString()); return this; }
public ParquetFileSystemDatasetReader(FileSystem fileSystem, Path path, Schema schema, Class<E> type) { Preconditions.checkArgument(fileSystem != null, "FileSystem cannot be null"); Preconditions.checkArgument(path != null, "Path cannot be null"); Preconditions.checkArgument(schema != null, "Schema cannot be null"); Preconditions.checkArgument(IndexedRecord.class.isAssignableFrom(type) || (Class<?>)type == Object.class, "The entity type must implement IndexedRecord"); this.fileSystem = fileSystem; this.path = path; this.schema = schema; this.type = type; this.readerSchema = DataModelUtil.getReaderSchema(type, schema); this.state = ReaderWriterState.NEW; }
@Test public void testResolveTypeSpecifcToSpecifc() { Class<StandardEvent> type = StandardEvent.class; Schema schema = StandardEvent.getClassSchema(); Class expResult = type; Class result = DataModelUtil.resolveType(type, schema); assertEquals(expResult, result); }
@Test public void testGetDatumReaderForReflectType() { Class<String> type = String.class; Schema writerSchema = Schema.create(Schema.Type.STRING); DatumReader result = DataModelUtil.getDatumReaderForType(type, writerSchema); assertEquals(ReflectDatumReader.class, result.getClass()); }
@Override public void initialize() { if (key == null) { // restore transient objects from serializable versions PartitionStrategy strategy = PartitionStrategyParser.parse(strategyString); Schema schema = new Schema.Parser().parse(schemaString); this.key = new AvroStorageKey(strategy, schema); this.accessor = DataModelUtil.accessor(type, schema); if (constraints != null) { this.provided = Constraints .fromQueryMap(schema, strategy, constraints) .getProvidedValues(); } } count = 0; }
public JSONFileReader(FileSystem fileSystem, Path path, EntityAccessor<E> accessor) { this.fs = fileSystem; this.path = path; this.schema = accessor.getReadSchema(); this.model = DataModelUtil.getDataModelForType(accessor.getType()); this.state = ReaderWriterState.NEW; }
/** * Sets the entity Class that the input Dataset should produce. * <p> * This Class is used to configure the input {@code Dataset}. If this class * cannot be found during job setup, the job will fail and throw a * {@link org.kitesdk.data.TypeNotFoundException}. * <p> * If the type is set, then the type's schema is used for the expected * schema and {@link #withSchema(Schema)} should not be called. This may, * however, be used at the same time if the type is a generic record * subclass. * * @param type the entity Class that will be produced * @return this for method chaining */ public <E> ConfigBuilder withType(Class<E> type) { String readerSchema = conf.get(KITE_READER_SCHEMA); Preconditions.checkArgument( DataModelUtil.isGeneric(type) || readerSchema == null, "Can't configure a type when a reader schema is already set: {}", readerSchema); conf.setClass(KITE_TYPE, type, type); return this; }
@Test public void testGetReaderSchemaForGeneric() { Class<GenericData.Record> type = GenericData.Record.class; Schema writerSchema = StandardEvent.getClassSchema(); Schema expResult = writerSchema; Schema result = DataModelUtil.getReaderSchema(type, writerSchema); assertEquals(expResult, result); }
@Test public void testResolveTypeGenericToGeneric() { Class<GenericData.Record> type = GenericData.Record.class; Schema schema = StandardEvent.getClassSchema(); Class expResult = type; Class result = DataModelUtil.resolveType(type, schema); assertEquals(expResult, result); }