/** * Load a {@link DatasetRepository} for the given dataset or view URI string. * <p> * URI formats are defined by {@code Dataset} implementations, but must begin * with "dataset:" or "view:". * * @param uriString a {@code Dataset} or {@code View} URI. * @param <R> The type of {@code DatasetRepository} expected. * @return a {@code DatasetRepository} responsible for the given URI. */ public static <R extends DatasetRepository> R repositoryFor(String uriString) { return repositoryFor(URI.create(uriString)); }
protected DatasetRepository getDatasetRepository() { if (repo == null) { this.repo = DatasetRepositories.repositoryFor(buildRepoURI()); } return repo; }
@Override public void run() { DatasetRepositories.repositoryFor("repo:hbase:zk1,zk2:2000/path"); } });
@VisibleForTesting URI getLegacyRepoUri(URI datasetUri, String namespace) { URI repoUri = DatasetRepositories.repositoryFor(datasetUri).getUri(); URI specificUri = URI.create(repoUri.getSchemeSpecificPart()); String repoScheme = specificUri.getScheme(); if (Sets.newHashSet("hdfs", "file", "hive").contains(repoScheme)) { try { specificUri = new URI(specificUri.getScheme(), specificUri.getUserInfo(), specificUri.getHost(), specificUri.getPort(), specificUri.getPath() + "/" + namespace, specificUri.getQuery(), specificUri.getFragment()); repoUri = URI.create("repo:" + specificUri.toString()); } catch (URISyntaxException ex) { throw new DatasetException("Error generating legacy URI", ex); } } return repoUri; }
@Test(expected = DatasetIOException.class) public void testHdfsFailsDefault() { // the environment doesn't contain the HDFS URI, so this should cause a // DatasetRepository exception about not finding HDFS DatasetRepositories.repositoryFor("repo:hdfs:/"); }
public static DatasetRepository newMockRepository() { ensureRegistered(); String uri = "repo:mock:" + Integer.toString(ids.incrementAndGet()); return DatasetRepositories.repositoryFor(uri); } }
private static DatasetRepository getDatasetRepository(JobContext jobContext) { Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext); DatasetRepository repo = DatasetRepositories.repositoryFor(conf.get(KITE_OUTPUT_URI)); if (repo instanceof TemporaryDatasetRepositoryAccessor) { Dataset<Object> dataset = load(jobContext).getDataset(); String namespace = dataset.getNamespace(); repo = ((TemporaryDatasetRepositoryAccessor) repo) .getTemporaryRepository(namespace, getJobDatasetName(jobContext)); } return repo; }
private static DatasetRepository getDatasetRepository(JobContext jobContext) { Configuration conf = Hadoop.JobContext.getConfiguration.invoke(jobContext); DatasetRepository repo = DatasetRepositories.repositoryFor(conf.get(KITE_OUTPUT_URI)); if (repo instanceof TemporaryDatasetRepositoryAccessor) { Dataset<Object> dataset = load(jobContext).getDataset(); String namespace = dataset.getNamespace(); repo = ((TemporaryDatasetRepositoryAccessor) repo) .getTemporaryRepository(namespace, getJobDatasetName(jobContext)); } return repo; }
@Test public void testRepositoryForStringUri() { URI datasetUri = new URIBuilder(repoUri, "ns", "test").build(); Assert.assertEquals(repo, DatasetRepositories.repositoryFor(datasetUri.toString())); verifyNoMoreInteractions(repo); }
@Test public void testHBaseURI() throws Exception { String zkQuorum = HBaseTestUtils.getConf().get(HConstants.ZOOKEEPER_QUORUM); String zkClientPort = HBaseTestUtils.getConf().get(HConstants.ZOOKEEPER_CLIENT_PORT); String zk = zkQuorum + ":" + zkClientPort; // OK since zkQuorum is a single host URI repositoryUri = new URI("repo:hbase:" + zk); DatasetRepository repo = DatasetRepositories.repositoryFor(repositoryUri); Assert.assertNotNull("Received a repository", repo); assertTrue("Repo is a HBase repo", repo instanceof HBaseDatasetRepository); assertEquals("Repository URI", repositoryUri, repo.getUri()); }
@Test public void testRepositoryFor() { URI datasetUri = new URIBuilder(repoUri, "ns", "test").build(); Assert.assertEquals(repo, DatasetRepositories.repositoryFor(datasetUri)); verifyNoMoreInteractions(repo); }
@Test public void testRepositoryForViewStringUri() { URI datasetUri = new URIBuilder(repoUri, "ns", "test") .with("field", 34) .build(); Assert.assertEquals(repo, DatasetRepositories.repositoryFor(datasetUri.toString())); verifyNoMoreInteractions(repo); }
@Test public void testLocalAbsolute() throws URISyntaxException { URI repositoryUri = new URI("repo:file:///tmp/dsr-repo-test"); DatasetRepository repository = DatasetRepositories.repositoryFor(repositoryUri); FileSystemMetadataProvider provider = (FileSystemMetadataProvider) ((FileSystemDatasetRepository) repository).getMetadataProvider(); Assert.assertEquals("Root directory should be the correct qualified path", new Path("file:/tmp/dsr-repo-test"), provider.getRootDirectory()); Assert.assertEquals("Repository URI", repositoryUri, repository.getUri()); }
@Test public void testRepositoryForView() { URI datasetUri = new URIBuilder(repoUri, "ns", "test") .with("field", 34) .build(); Assert.assertEquals(repo, DatasetRepositories.repositoryFor(datasetUri)); verifyNoMoreInteractions(repo); }
@Test public void testBasic() { DatasetRepository repo = DatasetRepositories.repositoryFor(repositoryUri); repo.delete("default", "test"); repo.create("default", "test", descriptor); RandomAccessDataset<Object> ds = Datasets .<Object, RandomAccessDataset<Object>>load(URI.create("dataset:hbase:" + zk + "/test"), Object.class); Assert.assertNotNull("Should load dataset", ds); Assert.assertTrue(ds instanceof DaoDataset); Assert.assertEquals("Descriptors should match", repo.load("default", "test").getDescriptor(), ds.getDescriptor()); repo.delete("default", "test"); }
@Test public void testRelative() { DatasetRepository repo = DatasetRepositories.repositoryFor("repo:file:target/data"); repo.delete("ns", "test"); repo.create("ns", "test", descriptor); Dataset<Record> ds = Datasets.<Record, Dataset<Record>> load("dataset:file:target/data/ns/test", Record.class); Assert.assertNotNull("Should load dataset", ds); Assert.assertTrue(ds instanceof FileSystemDataset); Path cwd = localFS.makeQualified(new Path(".")); Assert.assertEquals("Locations should match", new Path(cwd, "target/data/ns/test").toUri(), ds.getDescriptor().getLocation()); Assert.assertEquals("Descriptors should match", repo.load("ns", "test").getDescriptor(), ds.getDescriptor()); Assert.assertEquals("Should report correct namespace", "ns", ds.getNamespace()); Assert.assertEquals("Should report correct name", "test", ds.getName()); repo.delete("ns", "test"); }
@Test public void testAbsolute() { DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hdfs://" + hdfsAuth + "/tmp/data"); repo.delete("ns", "test"); repo.create("ns", "test", descriptor); Dataset<Object> ds = Datasets.<Object, Dataset<Object>> load("dataset:hdfs://" + hdfsAuth + "/tmp/data/ns/test", Object.class); Assert.assertNotNull("Should load dataset", ds); Assert.assertTrue(ds instanceof FileSystemDataset); Assert.assertEquals("Locations should match", URI.create("hdfs://" + hdfsAuth + "/tmp/data/ns/test"), ds.getDescriptor().getLocation()); Assert.assertEquals("Descriptors should match", repo.load("ns", "test").getDescriptor(), ds.getDescriptor()); Assert.assertEquals("Should report correct namespace", "ns", ds.getNamespace()); Assert.assertEquals("Should report correct name", "test", ds.getName()); repo.delete("ns", "test"); }
@Test public void testAbsoluteTrailingSlash() { DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hdfs://" + hdfsAuth + "/tmp/data/"); repo.delete("ns", "test"); repo.create("ns", "test", descriptor); Dataset<Object> ds = Datasets.<Object, Dataset<Object>> load("dataset:hdfs://" + hdfsAuth + "/tmp/data/ns/test/", Object.class); Assert.assertNotNull("Should load dataset", ds); Assert.assertTrue(ds instanceof FileSystemDataset); Assert.assertEquals("Locations should match", URI.create("hdfs://" + hdfsAuth + "/tmp/data/ns/test"), ds.getDescriptor().getLocation()); Assert.assertEquals("Descriptors should match", repo.load("ns", "test").getDescriptor(), ds.getDescriptor()); Assert.assertEquals("Should report correct namespace", "ns", ds.getNamespace()); Assert.assertEquals("Should report correct name", "test", ds.getName()); repo.delete("ns", "test"); }
@Test public void testAbsolute() { DatasetRepository repo = DatasetRepositories.repositoryFor("repo:file:/tmp/data"); repo.delete("ns", "test"); repo.create("ns", "test", descriptor); Dataset<Record> ds = Datasets.<Record, Dataset<Record>> load("dataset:file:/tmp/data/ns/test", Record.class); Assert.assertNotNull("Should load dataset", ds); Assert.assertTrue(ds instanceof FileSystemDataset); Assert.assertEquals("Locations should match", URI.create("file:/tmp/data/ns/test"), ds.getDescriptor().getLocation()); Assert.assertEquals("Descriptors should match", repo.load("ns", "test").getDescriptor(), ds.getDescriptor()); Assert.assertEquals("Should report correct namespace", "ns", ds.getNamespace()); Assert.assertEquals("Should report correct name", "test", ds.getName()); repo.delete("ns", "test"); }
@Test public void testAbsoluteRoot() { DatasetRepository repo = DatasetRepositories .repositoryFor("repo:hdfs://" + hdfsAuth + "/"); repo.delete("ns", "test"); repo.create("ns", "test", descriptor); Dataset<Object> ds = Datasets.<Object, Dataset<Object>> load("dataset:hdfs://" + hdfsAuth + "/ns/test", Object.class); Assert.assertNotNull("Should load dataset", ds); Assert.assertTrue(ds instanceof FileSystemDataset); Assert.assertEquals("Locations should match", URI.create("hdfs://" + hdfsAuth + "/ns/test"), ds.getDescriptor().getLocation()); Assert.assertEquals("Descriptors should match", repo.load("ns", "test").getDescriptor(), ds.getDescriptor()); Assert.assertEquals("Should report correct namespace", "ns", ds.getNamespace()); Assert.assertEquals("Should report correct name", "test", ds.getName()); repo.delete("ns", "test"); }