/** * Get the name of the dataset from the URI * * @param uri The dataset or view URI * @return The dataset name */ private static String uriToName(URI uri) { return Registration.lookupDatasetUri(URI.create( uri.getRawSchemeSpecificPart())).second().get("dataset"); }
@Override public void load() { Registration.register( new URIPattern("hbase::zk"), new URIPattern("hbase::zk/:dataset?namespace=default"), new OptionBuilder<DatasetRepository>() { @Override public DatasetRepository getFromOptions(Map<String, String> options) { Configuration conf = HBaseConfiguration.create(DefaultConfiguration.get()); String[] hostsAndPort = parseHostsAndPort(options.get("zk")); conf.set(HConstants.ZOOKEEPER_QUORUM, hostsAndPort[0]); String port = hostsAndPort[1]; if (port != null) { conf.set(HConstants.ZOOKEEPER_CLIENT_PORT, port); } return new HBaseDatasetRepository.Builder().configuration(conf).build(); } }); }
/** * Load a {@link DatasetRepository} for the given dataset, view or repository URI. * <p> * URI formats are defined by {@code Dataset} implementations, but must begin * with "dataset:" or "view:". * * @param uri a {@code Dataset} or {@code View} URI. * @param <R> The type of {@code DatasetRepository} expected. * @return a {@code DatasetRepository} responsible for the given URI. */ @SuppressWarnings("unchecked") public static <R extends DatasetRepository> R repositoryFor(URI uri) { boolean isRepoUri = URIBuilder.REPO_SCHEME.equals(uri.getScheme()); Preconditions.checkArgument(isRepoUri || URIBuilder.DATASET_SCHEME.equals(uri.getScheme()) || URIBuilder.VIEW_SCHEME.equals(uri.getScheme()), "Not a repository, dataset, or view URI: " + uri); Pair<DatasetRepository, Map<String, String>> pair; if (URIBuilder.REPO_SCHEME.equals(uri.getScheme())) { pair = Registration.lookupRepoUri( URI.create(uri.getRawSchemeSpecificPart())); } else { pair = Registration.lookupDatasetUri( URI.create(uri.getRawSchemeSpecificPart())); } return (R) pair.first(); }
@SuppressWarnings("unchecked") public static <R extends DatasetRepository> R open(URI uri) { return (R) lookupRepoUri(uri).first(); }
/** * Constructs a builder based on the given dataset or view URI. * * @param uri a dataset or view URI * * @since 0.17.0 */ public URIBuilder(URI uri) { Preconditions.checkNotNull(uri, "URI cannot be null"); boolean isViewUri = VIEW_SCHEME.equals(uri.getScheme()); Preconditions.checkArgument(isViewUri || DATASET_SCHEME.equals(uri.getScheme()), "Not a dataset or view URI: " + uri); Pair<URIPattern, Map<String, String>> pair = Registration .lookupDatasetPattern(URI.create(uri.getRawSchemeSpecificPart())); this.pattern = pair.first(); this.isView = isViewUri; options.put(NAMESPACE_OPTION, NAMESPACE_DEFAULT); options.putAll(pair.second()); }
/** * Constructs a builder based on the given repository URI and * {@link Dataset#getName() dataset name}. * * @param repoUri the repository URI * @param namespace A namespace, or logical group name, for the dataset. * @param dataset the {@link Dataset} name * * @since 0.17.0 */ public URIBuilder(URI repoUri, String namespace, String dataset) { Preconditions.checkNotNull(repoUri, "Repository URI cannot be null"); Preconditions.checkNotNull(dataset, "Dataset name cannot be null"); Preconditions.checkArgument(REPO_SCHEME.equals(repoUri.getScheme()), "Repository URI must start with \"repo:\" but was:" + repoUri); Pair<URIPattern, Map<String, String>> pair = Registration .lookupPatternByRepoUri(URI.create(repoUri.getRawSchemeSpecificPart())); this.pattern = pair.first(); options.putAll(pair.second()); options.put(DATASET_NAME_OPTION, dataset); options.put(NAMESPACE_OPTION, namespace); }
/** * List the {@link Dataset} URIs in the repository identified by the URI. * <p> * URI formats are defined by {@code Dataset} implementations. The repository * URIs you pass to this method must begin with {@code repo:}. For example, to * list the {@code Dataset} URIs for the Hive repository, provide the URI * {@code repo:hive}. * * @param uri a {@code DatasetRepository} URI * @return the URIs present in the {@code DatasetRepository} * @throws NullPointerException if {@code uri} is null * @throws IllegalArgumentException if {@code uri} is not a repository URI */ public static Collection<URI> list(URI uri) { boolean isRepo = URIBuilder.REPO_SCHEME.equals(uri.getScheme()); Preconditions.checkArgument(isRepo, "Not a repository URI: " + uri); DatasetRepository repo = Registration .open(URI.create(uri.getRawSchemeSpecificPart())); // build a URI for each dataset name URI repoUri = repo.getUri(); List<URI> datasets = Lists.newArrayList(); for (String namespace : repo.namespaces()) { for (String dataset : repo.datasets(namespace)) { datasets.add(new URIBuilder(repoUri, namespace, dataset).build()); } } return datasets; }
@Override public void load() { Registration.register( new URIPattern("hbase::zk"), new URIPattern("hbase::zk/:dataset?namespace=default"), new OptionBuilder<DatasetRepository>() { @Override public DatasetRepository getFromOptions(Map<String, String> options) { Configuration conf = HBaseConfiguration.create(DefaultConfiguration.get()); String[] hostsAndPort = parseHostsAndPort(options.get("zk")); conf.set(HConstants.ZOOKEEPER_QUORUM, hostsAndPort[0]); String port = hostsAndPort[1]; if (port != null) { conf.set(HConstants.ZOOKEEPER_CLIENT_PORT, port); } return new HBaseDatasetRepository.Builder().configuration(conf).build(); } }); }
/** * Get the name of the dataset from the URI * * @param uri The dataset or view URI * @return The dataset name */ private static String uriToName(URI uri) { return Registration.lookupDatasetUri(URI.create( uri.getRawSchemeSpecificPart())).second().get("dataset"); }
private static void registerMockRepoBuilder() { final URIPattern mockPattern = new URIPattern("mock::id"); Registration.register( mockPattern, mockPattern, new OptionBuilder<DatasetRepository>() { @Override public DatasetRepository getFromOptions(Map<String, String> options) { DatasetRepository repo = repos.get(options.get("id")); if (repo == null) { repo = mock(org.kitesdk.data.spi.DatasetRepository.class); when(repo.getUri()).thenReturn( URI.create("repo:" + mockPattern.construct(options))); repos.put(options.get("id"), repo); } return repo; } } ); }
private static String uriToName(URI uri) { return Registration.lookupDatasetUri(URI.create( uri.getRawSchemeSpecificPart())).second().get("dataset"); } }
@Override public void load() { try { // load hdfs-site.xml by loading HdfsConfiguration FileSystem.getLocal(DefaultConfiguration.get()); } catch (IOException e) { throw new DatasetIOException("Cannot load default config", e); } OptionBuilder<DatasetRepository> builder = new URIBuilder(); Registration.register( new URIPattern("file:/*path?absolute=true"), new URIPattern("file:/*path/:namespace/:dataset?absolute=true"), builder); Registration.register( new URIPattern("file:*path"), new URIPattern("file:*path/:namespace/:dataset"), builder); Registration.register( new URIPattern("hdfs:/*path?absolute=true"), new URIPattern("hdfs:/*path/:namespace/:dataset?absolute=true"), builder); Registration.register( new URIPattern("hdfs:*path"), new URIPattern("hdfs:*path/:namespace/:dataset"), builder); Registration.register( new URIPattern("webhdfs:/*path?absolute=true"), new URIPattern("webhdfs:/*path/:namespace/:dataset?absolute=true"), builder); }
protected static Map<String, String> optionsForUri(URI uri) { Preconditions.checkArgument(isDatasetOrViewUri(uri.toString()), "Must be a dataset or view URI: " + uri); return Registration.lookupDatasetUri( URI.create(uri.getSchemeSpecificPart())).second(); }
@Override public void load() { try { // load hdfs-site.xml by loading HdfsConfiguration FileSystem.getLocal(DefaultConfiguration.get()); } catch (IOException e) { throw new DatasetIOException("Cannot load default config", e); } OptionBuilder<DatasetRepository> builder = new URIBuilder(); // username and secret are the same; host is the bucket Registration.register( new URIPattern("s3n:/*path"), new URIPattern("s3n:/*path/:namespace/:dataset"), builder); Registration.register( new URIPattern("s3a:/*path"), new URIPattern("s3a:/*path/:namespace/:dataset"), builder); }
private static boolean deleteWithTrash(URI uri, boolean useTrash){ Preconditions.checkArgument( URIBuilder.DATASET_SCHEME.equals(uri.getScheme()), "Not a dataset URI: " + uri); Pair<DatasetRepository, Map<String, String>> pair = Registration.lookupDatasetUri(URI.create(uri.getRawSchemeSpecificPart())); DatasetRepository repo = pair.first(); Map<String, String> uriOptions = pair.second(); return useTrash ? repo.moveToTrash(uriOptions.get(URIBuilder.NAMESPACE_OPTION), uriOptions.get(URIBuilder.DATASET_NAME_OPTION)) : repo.delete(uriOptions.get(URIBuilder.NAMESPACE_OPTION), uriOptions.get(URIBuilder.DATASET_NAME_OPTION)); }
@Override public void load() { try { // load hdfs-site.xml by loading HdfsConfiguration FileSystem.getLocal(DefaultConfiguration.get()); } catch (IOException e) { throw new DatasetIOException("Cannot load default config", e); } OptionBuilder<DatasetRepository> builder = new URIBuilder(); // username and secret are the same; host is the bucket Registration.register( new URIPattern("s3n:/*path"), new URIPattern("s3n:/*path/:namespace/:dataset"), builder); Registration.register( new URIPattern("s3a:/"), new URIPattern("s3a:/:namespace/:dataset"), builder); }
/** * Check whether a {@link Dataset} identified by the given URI exists. * <p> * URIs must begin with {@code dataset:}. The remainder of * the URI is implementation specific, depending on the dataset scheme. * * @param uri a {@code Dataset} URI * @return {@code true} if the dataset exists, {@code false} otherwise * @throws NullPointerException if {@code uri} is null * @throws IllegalArgumentException if {@code uri} is not a dataset URI */ public static boolean exists(URI uri) { Preconditions.checkArgument( URIBuilder.DATASET_SCHEME.equals(uri.getScheme()), "Not a dataset URI: " + uri); Pair<DatasetRepository, Map<String, String>> pair = Registration.lookupDatasetUri(URI.create(uri.getRawSchemeSpecificPart())); DatasetRepository repo = pair.first(); Map<String, String> uriOptions = pair.second(); return repo.exists( uriOptions.get(URIBuilder.NAMESPACE_OPTION), uriOptions.get(URIBuilder.DATASET_NAME_OPTION)); }
Registration.register( new URIPattern("hive"), new URIPattern("hive::namespace/:dataset"), managedBuilder); Registration.register( new URIPattern("hive"), new URIPattern("hive::dataset?namespace=default"), managedBuilder); Registration.register( new URIPattern("hive"), new URIPattern("hive?namespace=default"), managedBuilder); Registration.register( new URIPattern("hive://" + NOT_SET), new URIPattern("hive:/:namespace/:dataset"), managedBuilder); Registration.register( new URIPattern("hive://" + NOT_SET), new URIPattern("hive:/:dataset?namespace=default"), managedBuilder); Registration.register( new URIPattern("hive://" + NOT_SET), new URIPattern("hive://" + NOT_SET + "?namespace=default"), managedBuilder); Registration.register( new URIPattern("hive:/*path?absolute=true"), new URIPattern("hive:/*path/:namespace/:dataset?absolute=true"),
Registration.lookupDatasetUri(URI.create(uri.getRawSchemeSpecificPart())); DatasetRepository repo = pair.first(); Map<String, String> uriOptions = pair.second();
Registration.register( new URIPattern("hive"), new URIPattern("hive::namespace/:dataset"), managedBuilder); Registration.register( new URIPattern("hive"), new URIPattern("hive::dataset?namespace=default"), managedBuilder); Registration.register( new URIPattern("hive"), new URIPattern("hive?namespace=default"), managedBuilder); Registration.register( new URIPattern("hive://" + NOT_SET), new URIPattern("hive:/:namespace/:dataset"), managedBuilder); Registration.register( new URIPattern("hive://" + NOT_SET), new URIPattern("hive:/:dataset?namespace=default"), managedBuilder); Registration.register( new URIPattern("hive://" + NOT_SET), new URIPattern("hive://" + NOT_SET + "?namespace=default"), managedBuilder); Registration.register( new URIPattern("hive:/*path?absolute=true"), new URIPattern("hive:/*path/:namespace/:dataset?absolute=true"),