@Override public Schema getSchema() { return accessor.getReadSchema(); }
@Override public Object extractField(E entity, String fieldName) { // make sure the field is a direct child of the schema ValidationException.check( accessor.getReadSchema().getField(fieldName) != null, "No field named %s in schema %s", fieldName, accessor.getReadSchema()); return accessor.get(entity, fieldName); }
@Override public Object extractField(E entity, String fieldName) { // make sure the field is a direct child of the schema ValidationException.check( accessor.getReadSchema().getField(fieldName) != null, "No field named %s in schema %s", fieldName, accessor.getReadSchema()); return accessor.get(entity, fieldName); }
public JSONFileReader(FileSystem fileSystem, Path path, EntityAccessor<E> accessor) { this.fs = fileSystem; this.path = path; this.schema = accessor.getReadSchema(); this.model = DataModelUtil.getDataModelForType(accessor.getType()); this.state = ReaderWriterState.NEW; }
@SuppressWarnings("unchecked") public CSVFileReader(FileSystem fileSystem, Path path, DatasetDescriptor descriptor, EntityAccessor<E> accessor) { this.fs = fileSystem; this.path = path; this.schema = accessor.getReadSchema(); this.recordClass = accessor.getType(); this.state = ReaderWriterState.NEW; this.props = CSVProperties.fromDescriptor(descriptor); // defaults to false: assume that callers will not make defensive copies this.reuseRecords = DescriptorUtil.isEnabled(REUSE_RECORDS, descriptor); Preconditions.checkArgument(Schema.Type.RECORD.equals(schema.getType()), "Schemas for CSV files must be records of primitive types"); }
@SuppressWarnings("unchecked") // See https://github.com/Parquet/parquet-mr/issues/106 private void openNextReader() { if (Formats.PARQUET.equals(descriptor.getFormat())) { this.reader = new ParquetFileSystemDatasetReader(fileSystem, filesIter.next(), accessor.getReadSchema(), accessor.getType()); } else if (Formats.JSON.equals(descriptor.getFormat())) { this.reader = new JSONFileReader<E>( fileSystem, filesIter.next(), accessor); } else if (Formats.CSV.equals(descriptor.getFormat())) { this.reader = new CSVFileReader<E>(fileSystem, filesIter.next(), descriptor, accessor); } else if (Formats.INPUTFORMAT.equals(descriptor.getFormat())) { this.reader = new InputFormatReader(fileSystem, filesIter.next(), descriptor); } else { this.reader = new FileSystemDatasetReader<E>(fileSystem, filesIter.next(), accessor.getReadSchema(), accessor.getType()); } reader.initialize(); this.readerIterator = Iterators.filter(reader, constraints.toEntityPredicate( (pathIter != null ? pathIter.getStorageKey() : null), accessor)); }
protected AbstractRefinableView(AbstractRefinableView<?> view, Schema schema, Class<E> type) { if (view.dataset instanceof AbstractDataset) { this.dataset = ((AbstractDataset<?>) view.dataset).asType(type); } else { this.dataset = Datasets.load(view.dataset.getUri(), type); } this.comparator = view.comparator; this.constraints = view.constraints; // thread-safe, so okay to reuse when views share a partition strategy this.keys = view.keys; // Resolve our type according to the given schema this.accessor = DataModelUtil.accessor(type, schema); this.entityTest = constraints.toEntityPredicate(accessor); Schema datasetSchema = dataset.getDescriptor().getSchema(); this.canRead = SchemaValidationUtil.canRead( datasetSchema, accessor.getReadSchema()); this.canWrite = SchemaValidationUtil.canRead( accessor.getWriteSchema(), datasetSchema); IncompatibleSchemaException.check(canRead || canWrite, "The type cannot be used to read from or write to the dataset:\n" + "Type schema: %s\nDataset schema: %s", getSchema(), datasetSchema); }
datasetSchema, accessor.getReadSchema()); this.canWrite = SchemaValidationUtil.canRead( accessor.getWriteSchema(), datasetSchema);