return Datasets.load(uri).getDataset().getDescriptor().getSchema(); } else if ("resource".equals(uri.getScheme())) { try (InputStream in = Resources.getResource(uri.getSchemeSpecificPart()).openStream()) {
final Schema schema = target.getDataset().getDescriptor().getSchema();
public CSVAppender(FileSystem fs, Path path, DatasetDescriptor descriptor) { this.fs = fs; this.path = path; this.schema = descriptor.getSchema(); Preconditions.checkState(schema.getType() == Schema.Type.RECORD, "Unsupported schema (not a record): {}", schema); this.props = CSVProperties.fromDescriptor(descriptor); }
public CSVRecordParser(CSVProperties props, View<E> view, @Nullable List<String> header) { this(props, view.getDataset().getDescriptor().getSchema(), view.getType(), header); }
public IncrementalDatasetWriterCacheLoader(FileSystemView<E> view, ConfAccessor conf) { this.view = view; this.convert = new PathConversion( view.getDataset().getDescriptor().getSchema()); this.conf = conf; }
protected void checkSchemaForWrite() { IncompatibleSchemaException.check(canWrite, "Cannot write data with this view's schema, " + "it cannot be read with the dataset's schema:\n" + "Current schema: %s\nDataset schema: %s", getSchema(), dataset.getDescriptor().getSchema()); }
public DatasetWriterCacheLoader(FileSystemView<E> view, ConfAccessor conf) { this.view = view; this.convert = new PathConversion( view.getDataset().getDescriptor().getSchema()); this.conf = conf; }
protected void checkSchemaForRead() { IncompatibleSchemaException.check(canRead, "Cannot read data with this view's schema:\n" + "Current schema: %s\nDataset schema: %s", dataset.getDescriptor().getSchema(), getSchema()); } }
@Override public AvroEntitySchema parseEntitySchema(String rawSchema) { DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaLiteral(rawSchema) .build(); return new AvroEntitySchema( descriptor.getSchema(), rawSchema, descriptor.getColumnMapping()); }
public DatasetRecordWriter(View<E> view, boolean copyRecords) { this.datasetWriter = view.newWriter(); this.schema = view.getDataset().getDescriptor().getSchema(); this.dataModel = DataModelUtil.getDataModelForType( view.getType()); this.copyRecords = copyRecords; }
@Override public AvroKeySchema parseKeySchema(String rawSchema) { DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaLiteral(rawSchema) .build(); return new AvroKeySchema( descriptor.getSchema(), descriptor.getPartitionStrategy()); }
@Test public void testSchemaFromAvroDataFile() throws Exception { URI uri = Resources.getResource("data/strings-100.avro").toURI(); Schema schema = new DatasetDescriptor.Builder().schemaFromAvroDataFile(uri).build() .getSchema(); Assert.assertEquals(DatasetTestUtilities.STRING_SCHEMA, schema); }
@Override public AvroEntitySchema parseEntitySchema(String rawSchema, ColumnMapping columnMapping) { // use DatasetDescriptor.Builder because it checks consistency DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaLiteral(rawSchema) .columnMapping(columnMapping) .build(); return new AvroEntitySchema( descriptor.getSchema(), rawSchema, descriptor.getColumnMapping()); } }
@Override public AvroKeySchema parseKeySchema(String rawSchema, PartitionStrategy partitionStrategy) { // use DatasetDescriptor.Builder because it checks consistency DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schemaLiteral(rawSchema) .partitionStrategy(partitionStrategy) .build(); return new AvroKeySchema( descriptor.getSchema(), descriptor.getPartitionStrategy()); }
@Override public <T> View<T> asType(Class<T> type) { if (DataModelUtil.isGeneric(type)) { // if the type is generic, don't reset the schema return project(getSchema(), type); } // otherwise, the type determines the schema return project(getDataset().getDescriptor().getSchema(), type); }
@Test public void testSchemaFromResourceURI() throws Exception { String uri = "resource:standard_event.avsc"; DatasetDescriptor descriptor = new DatasetDescriptor.Builder().schemaUri(uri).build(); Assert.assertNotNull(descriptor); Assert.assertNotNull(descriptor.getSchema()); }
@Override public DatasetReader<GenericData.Record> newReader() throws IOException { final DatasetDescriptor desc = new DatasetDescriptor.Builder() .property("kite.csv.has-header", "true") .schema(VALIDATOR_SCHEMA) .build(); return new CSVFileReader<GenericData.Record>(localfs, validatorFile, desc, DataModelUtil.accessor(GenericData.Record.class, desc.getSchema())); }
@Test public void testDescriptorValidationPasses() { DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schema(schema) .partitionStrategy(strategy) .build(); Assert.assertEquals("Descriptor should have correct schema", schema, descriptor.getSchema()); Assert.assertEquals("Descriptor should have correct strategy", strategy, descriptor.getPartitionStrategy()); }
@Test public void testLoad() { ensureCreated(); Dataset dataset = repo.load(NAMESPACE, NAME); Assert.assertNotNull("Dataset is loaded and produced", dataset); Assert.assertEquals("Dataset name is propagated", NAME, dataset.getName()); Assert.assertEquals("Dataset schema is loaded", testSchema, dataset.getDescriptor().getSchema()); }