/** * Sets the expected schema to use when reading records from the Dataset. * <p> * If this schema is set, {@link #withType(Class)} should only be called * with a generic record subclass. * * @param readerSchema the expected entity schema * @return this for method chaining * @since 1.1.0 */ public ConfigBuilder withSchema(Schema readerSchema) { Class<?> type = conf.getClass(KITE_TYPE, null); Preconditions.checkArgument( type == null || DataModelUtil.isGeneric(type), "Can't configure a reader schema when a type is already set: {}", type); conf.set(KITE_READER_SCHEMA, readerSchema.toString()); return this; }
/** * Sets the expected schema to use when reading records from the Dataset. * <p> * If this schema is set, {@link #withType(Class)} should only be called * with a generic record subclass. * * @param readerSchema the expected entity schema * @return this for method chaining * @since 1.1.0 */ public ConfigBuilder withSchema(Schema readerSchema) { Class<?> type = conf.getClass(KITE_TYPE, null); Preconditions.checkArgument( type == null || DataModelUtil.isGeneric(type), "Can't configure a reader schema when a type is already set: {}", type); conf.set(KITE_READER_SCHEMA, readerSchema.toString()); return this; }
/** * Sets the entity Class that the input Dataset should produce. * <p> * This Class is used to configure the input {@code Dataset}. If this class * cannot be found during job setup, the job will fail and throw a * {@link org.kitesdk.data.TypeNotFoundException}. * <p> * If the type is set, then the type's schema is used for the expected * schema and {@link #withSchema(Schema)} should not be called. This may, * however, be used at the same time if the type is a generic record * subclass. * * @param type the entity Class that will be produced * @return this for method chaining */ public <E> ConfigBuilder withType(Class<E> type) { String readerSchema = conf.get(KITE_READER_SCHEMA); Preconditions.checkArgument( DataModelUtil.isGeneric(type) || readerSchema == null, "Can't configure a type when a reader schema is already set: {}", readerSchema); conf.setClass(KITE_TYPE, type, type); return this; }
/** * Sets the entity Class that the input Dataset should produce. * <p> * This Class is used to configure the input {@code Dataset}. If this class * cannot be found during job setup, the job will fail and throw a * {@link org.kitesdk.data.TypeNotFoundException}. * <p> * If the type is set, then the type's schema is used for the expected * schema and {@link #withSchema(Schema)} should not be called. This may, * however, be used at the same time if the type is a generic record * subclass. * * @param type the entity Class that will be produced * @return this for method chaining */ public <E> ConfigBuilder withType(Class<E> type) { String readerSchema = conf.get(KITE_READER_SCHEMA); Preconditions.checkArgument( DataModelUtil.isGeneric(type) || readerSchema == null, "Can't configure a type when a reader schema is already set: {}", readerSchema); conf.setClass(KITE_TYPE, type, type); return this; }
/** * If E implements GenericRecord, but does not implement SpecificRecord, then * create a new instance of E using reflection so that GenericDataumReader * will use the expected type. * * Implementations of GenericRecord that require a {@link Schema} parameter * in the constructor should implement SpecificData.SchemaConstructable. * Otherwise, your implementation must have a no-args constructor. * * @param <E> The entity type * @param type The Java class of the entity type * @param schema The reader schema * @return An instance of E, or null if the data model is specific or reflect */ @SuppressWarnings("unchecked") public static <E> E createRecord(Class<E> type, Schema schema) { // Don't instantiate SpecificRecords or interfaces. if (isGeneric(type) && !type.isInterface()) { if (GenericData.Record.class.equals(type)) { return (E) GenericData.get().newRecord(null, schema); } return (E) ReflectData.newInstance(type, schema); } return null; }
@Override public <T> View<T> asType(Class<T> type) { if (DataModelUtil.isGeneric(type)) { // if the type is generic, don't reset the schema return project(getSchema(), type); } // otherwise, the type determines the schema return project(getDataset().getDescriptor().getSchema(), type); }
/** * Adds configuration for {@code DatasetKeyInputFormat} to read from the * given {@link Dataset} or {@link View} instance. * * @param view a dataset or view * @return this for method chaining */ public ConfigBuilder readFrom(View<?> view) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); // if this is a partitioned dataset, add the partition location if (view instanceof FileSystemDataset) { conf.set(KITE_PARTITION_DIR, String.valueOf(descriptor.getLocation())); } // add descriptor properties to the config for (String property : descriptor.listProperties()) { conf.set(property, descriptor.getProperty(property)); } if (DataModelUtil.isGeneric(view.getType())) { Schema datasetSchema = view.getDataset().getDescriptor().getSchema(); // only set the read schema if the view is a projection if (!datasetSchema.equals(view.getSchema())) { withSchema(view.getSchema()); } } else { withType(view.getType()); } conf.set(KITE_INPUT_URI, view.getUri().toString()); return this; }
/** * Adds configuration for {@code DatasetKeyInputFormat} to read from the * given {@link Dataset} or {@link View} instance. * * @param view a dataset or view * @return this for method chaining */ public ConfigBuilder readFrom(View<?> view) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); // if this is a partitioned dataset, add the partition location if (view instanceof FileSystemDataset) { conf.set(KITE_PARTITION_DIR, String.valueOf(descriptor.getLocation())); } // add descriptor properties to the config for (String property : descriptor.listProperties()) { conf.set(property, descriptor.getProperty(property)); } if (DataModelUtil.isGeneric(view.getType())) { Schema datasetSchema = view.getDataset().getDescriptor().getSchema(); // only set the read schema if the view is a projection if (!datasetSchema.equals(view.getSchema())) { withSchema(view.getSchema()); } } else { withType(view.getType()); } conf.set(KITE_INPUT_URI, view.getUri().toString()); return this; }