/** * Imports an existing schema stored at the given path. This * is generally used to bring in schemas written by previous * versions of this library. * * @param schemaPath A path to a schema to import * @return The URI of the schema file managed by this manager. */ public URI importSchema(Path schemaPath) { Schema schema = loadSchema(schemaPath); return writeSchema(schema); }
/** * Creates a new schema manager using the given root directory of a * dataset for its base. * * @param conf the Hadoop configuration * @param schemaDirectory directory in which the manager * stores schemas. * * @return a schema manager instance. */ public static SchemaManager create(Configuration conf, Path schemaDirectory) { try { FileSystem rootFileSystem = schemaDirectory.getFileSystem(conf); rootFileSystem.mkdirs(schemaDirectory); return new SchemaManager(schemaDirectory, rootFileSystem); } catch (IOException e) { throw new DatasetIOException("Unable to create schema manager directory: " + schemaDirectory, e); } }
/** * Returns a map of schema versions with the schemas themselves. */ public Map<Integer,Schema> getSchemas() { Map<Integer,Schema> schemas = new TreeMap<Integer, Schema>(); try { FileStatus[] statuses = rootFileSystem.listStatus(schemaDirectory); for (FileStatus fileStatus: statuses) { int schemaNumber = getFileNumber(fileStatus); Schema schema = loadSchema(fileStatus.getPath()); schemas.put(schemaNumber, schema); } } catch (IOException e) { throw new DatasetIOException("Unable to list schema files.", e); } return schemas; } }
/** * Gets the newest schema version being managed. * * @return thew newest schema version */ public Schema getNewestSchema() { Path schemaPath = newestFile(); return schemaPath == null ? null : loadSchema(schemaPath); }
@Test public void testSameSchemaUpdate() throws IOException { SchemaManager manager = SchemaManager.create(getConfiguration(), testDirectory); URI uri1 = manager.writeSchema(DatasetTestUtilities.USER_SCHEMA); URI uri2 = manager.writeSchema(DatasetTestUtilities.USER_SCHEMA); Assert.assertEquals("Updating with the same schema should not create a new URI", uri1, uri2); } }
@Test public void testCreateSchema() throws IOException { SchemaManager manager = SchemaManager.create(getConfiguration(), testDirectory); manager.writeSchema(DatasetTestUtilities.USER_SCHEMA); Schema schema = manager.getNewestSchema(); Assert.assertEquals(DatasetTestUtilities.USER_SCHEMA, schema); }
@Test public void testManyUpdates() throws IOException { SchemaManager manager = SchemaManager.create(getConfiguration(), testDirectory); // Create an updated schema and ensure it can be written. for (int i = 0; i < 20; ++i) { SchemaBuilder.FieldAssembler<Schema> fields = SchemaBuilder .record("test").fields(); for (int j = 0; j <= i; ++j) { fields.optionalString("field_" + j); } Schema schema = fields.endRecord(); manager.writeSchema(schema); // Ensure we always see the newest schema on load. Assert.assertEquals(schema, manager.getNewestSchema()); } // Make sure all of the updates are in place. Map<Integer, Schema> schemas = manager.getSchemas(); Assert.assertEquals(20, schemas.size()); }
Path previousPath = newestFile(); Schema previousSchema = loadSchema(previousPath); Map<Integer, Schema> schemas = getSchemas();
SchemaManager manager = SchemaManager.load(conf, new Path(metadataPath, SCHEMA_DIRECTORY_NAME)); schemaURI = manager.getNewestSchemaURI();
@Test public void testNoSchemaManagerDirectory() throws IOException { SchemaManager manager = SchemaManager.load(getConfiguration(), new Path(testDirectory, "NO_SUCH_DIRECTORY")); Assert.assertNull(manager); }
SchemaManager manager = SchemaManager.create(fs.getConf(), new Path(metadataLocation, SCHEMA_DIRECTORY_NAME)); manager.writeSchema(descriptor.getSchema());
@Test public void testUpdateSchema() throws IOException { SchemaManager manager = SchemaManager.create(getConfiguration(), testDirectory); manager.writeSchema(DatasetTestUtilities.USER_SCHEMA); Schema schema = manager.getNewestSchema(); Assert.assertEquals(DatasetTestUtilities.USER_SCHEMA, schema); // Create an updated schema and ensure it can be written. Schema updatedSchema = SchemaBuilder.record(schema.getName()) .fields() .requiredString("username") .requiredString("email") .optionalBoolean("extra_field").endRecord(); manager.writeSchema(updatedSchema); Assert.assertEquals(updatedSchema, manager.getNewestSchema()); }
@Override public DatasetDescriptor update(String namespace, String name, DatasetDescriptor descriptor) { Compatibility.checkDatasetName(namespace, name); Compatibility.checkDescriptor(descriptor); String resolved = resolveNamespace(namespace, name); if (resolved != null) { Table table = getMetaStoreUtil().getTable(resolved, name); Path managerPath = new Path(new Path(table.getSd().getLocation()), SCHEMA_DIRECTORY); SchemaManager manager = SchemaManager.create(conf, managerPath); DatasetDescriptor newDescriptor; try { URI schemaURI = manager.writeSchema(descriptor.getSchema()); newDescriptor = new DatasetDescriptor.Builder(descriptor) .schemaUri(schemaURI).build(); } catch (IOException e) { throw new DatasetIOException("Unable to create schema", e); } HiveUtils.updateTableSchema(table, newDescriptor); getMetaStoreUtil().alterTable(table); return descriptor; } throw new DatasetNotFoundException( "Hive table not found: " + namespace + "." + name); }
/** * Loads a schema manager that stores data under the given dataset root * directory it exists. Returns <code>null</code> if it does not. * * @param conf the Hadoop configuration * @param schemaDirectory directory in which the manager stores schemas. * * @return a schema manager instance, or <code>null</code> if the given * directory does not exist. */ public static SchemaManager load(Configuration conf, Path schemaDirectory) { try { FileSystem rootFileSystem = schemaDirectory.getFileSystem(conf); if (rootFileSystem.exists(schemaDirectory)) { return new SchemaManager(schemaDirectory, rootFileSystem); } else { return null; } } catch (IOException e) { throw new DatasetIOException ("Cannot load schema manager at:" + schemaDirectory, e); } }
@Override public DatasetDescriptor update(String namespace, String name, DatasetDescriptor descriptor) { Compatibility.checkDatasetName(namespace, name); Compatibility.checkDescriptor(descriptor); String resolved = resolveNamespace(namespace, name); if (resolved != null) { Table table = getMetaStoreUtil().getTable(resolved, name); Path managerPath = new Path(new Path(table.getSd().getLocation()), SCHEMA_DIRECTORY); SchemaManager manager = SchemaManager.create(conf, managerPath); DatasetDescriptor newDescriptor; try { URI schemaURI = manager.writeSchema(descriptor.getSchema()); newDescriptor = new DatasetDescriptor.Builder(descriptor) .schemaUri(schemaURI).build(); } catch (IOException e) { throw new DatasetIOException("Unable to create schema", e); } HiveUtils.updateTableSchema(table, newDescriptor); getMetaStoreUtil().alterTable(table); return descriptor; } throw new DatasetNotFoundException( "Hive table not found: " + namespace + "." + name); }
@Test(expected = IncompatibleSchemaException.class) public void testIncompatibleUpdate() { SchemaManager manager = SchemaManager.create(getConfiguration(), testDirectory); // Trivially incompatible schemas should yield an exception. manager.writeSchema(SchemaBuilder.record("test") .fields() .requiredString("foo") .endRecord()); manager.writeSchema(SchemaBuilder.record("test") .fields() .requiredString("bar") .endRecord()); }
SchemaManager manager = SchemaManager.create(conf, managerPath); URI managedSchemaUri = manager.writeSchema(descriptor.getSchema());
SchemaManager manager = SchemaManager.create(conf, managerPath); URI managedSchemaUri = manager.writeSchema(descriptor.getSchema());
@Test(expected = IncompatibleSchemaException.class) public void testIndirectIncompatibleUpdate() { SchemaManager manager = SchemaManager.create(getConfiguration(), testDirectory); // Write two schemas that are compatible since they use optional fields. manager.writeSchema(SchemaBuilder.record("test") .fields() .optionalString("foo") .endRecord()); manager.writeSchema(SchemaBuilder.record("test") .fields() .optionalString("bar") .endRecord()); // This schema creates a schema compatible with the immediately previous // version, but incompatible with the original. manager.writeSchema(SchemaBuilder.record("test") .fields() .optionalInt("foo") .endRecord()); }
SchemaManager manager = SchemaManager.create(conf, managerPath); URI schemaLocation = manager.writeSchema(descriptor.getSchema());