@Override public void process(InputStream in) throws IOException { try (DataFileStream<Record> stream = new DataFileStream<>( in, AvroUtil.newDatumReader(schema, Record.class))) { IncompatibleSchemaException.check( SchemaValidationUtil.canRead(stream.getSchema(), schema), "Incompatible file schema %s, expected %s", stream.getSchema(), schema); long written = 0L; try (DatasetWriter<Record> writer = target.newWriter()) { for (Record record : stream) { writer.write(record); written += 1; } } finally { session.adjustCounter("Stored records", written, true /* cannot roll back the write */); } } } });
/** * Merges two {@link Schema} instances if they are compatible. * <p> * Two schemas are incompatible if: * <ul> * <li>The {@link Schema.Type} does not match.</li> * <li>For record schemas, the record name does not match</li> * <li>For enum schemas, the enum name does not match</li> * </ul> * <p> * Map value and array element types will use unions if necessary, and union * schemas are merged recursively. * * @param left a {@code Schema} * @param right a {@code Schema} * @return a merged {@code Schema} * @throws IncompatibleSchemaException if the schemas are not compatible */ public static Schema merge(Schema left, Schema right) { Schema merged = mergeOnly(left, right); IncompatibleSchemaException.check(merged != null, "Cannot merge %s and %s", left, right); return merged; }
protected void checkSchemaForWrite() { IncompatibleSchemaException.check(canWrite, "Cannot write data with this view's schema, " + "it cannot be read with the dataset's schema:\n" + "Current schema: %s\nDataset schema: %s", getSchema(), dataset.getDescriptor().getSchema()); }
protected void checkSchemaForRead() { IncompatibleSchemaException.check(canRead, "Cannot read data with this view's schema:\n" + "Current schema: %s\nDataset schema: %s", dataset.getDescriptor().getSchema(), getSchema()); } }
@Override public void process(InputStream in) throws IOException { try (DataFileStream<Record> stream = new DataFileStream<>( in, AvroUtil.newDatumReader(schema, Record.class))) { IncompatibleSchemaException.check( SchemaValidationUtil.canRead(stream.getSchema(), schema), "Incompatible file schema %s, expected %s", stream.getSchema(), schema); long written = 0L; try (DatasetWriter<Record> writer = target.newWriter()) { for (Record record : stream) { writer.write(record); written += 1; } } finally { session.adjustCounter("Stored records", written, true /* cannot roll back the write */); } } } });
protected AbstractRefinableView(AbstractRefinableView<?> view, Schema schema, Class<E> type) { if (view.dataset instanceof AbstractDataset) { this.dataset = ((AbstractDataset<?>) view.dataset).asType(type); } else { this.dataset = Datasets.load(view.dataset.getUri(), type); } this.comparator = view.comparator; this.constraints = view.constraints; // thread-safe, so okay to reuse when views share a partition strategy this.keys = view.keys; // Resolve our type according to the given schema this.accessor = DataModelUtil.accessor(type, schema); this.entityTest = constraints.toEntityPredicate(accessor); Schema datasetSchema = dataset.getDescriptor().getSchema(); this.canRead = SchemaValidationUtil.canRead( datasetSchema, accessor.getReadSchema()); this.canWrite = SchemaValidationUtil.canRead( accessor.getWriteSchema(), datasetSchema); IncompatibleSchemaException.check(canRead || canWrite, "The type cannot be used to read from or write to the dataset:\n" + "Type schema: %s\nDataset schema: %s", getSchema(), datasetSchema); }
accessor.getWriteSchema(), datasetSchema); IncompatibleSchemaException.check(canRead || canWrite, "The type cannot be used to read from or write to the dataset:\n" + "Type schema: %s\nDataset schema: %s",