@Override public Dataset<GenericRecord> run() { return Datasets.load(datasetUri); } });
private SavePolicy(Context context) { String uri = context.getString(CONFIG_KITE_ERROR_DATASET_URI); Preconditions.checkArgument(uri != null, "Must set " + CONFIG_KITE_ERROR_DATASET_URI + " when " + CONFIG_FAILURE_POLICY + "=save"); if (Datasets.exists(uri)) { dataset = Datasets.load(uri, AvroFlumeEvent.class); } else { DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schema(AvroFlumeEvent.class) .build(); dataset = Datasets.create(uri, descriptor, AvroFlumeEvent.class); } nEventsHandled = 0; }
/** * Create a {@link Dataset} for the given dataset or view URI. * {@code create} returns an empty dataset. You can use {@code DatasetWriter} * to populate your dataset. * <p> * URIs must begin with {@code dataset:} or {@code view:}. The remainder of * the URI is implementation specific, depending on the dataset scheme. If the * URI is a view URI, this method creates the underlying dataset and returns a * view of it. * * @param uri a {@code Dataset} or {@code View} URI * @param <V> the type of {@code Dataset} or {@code View} expected * @return a newly created {@code Dataset} responsible for the given URI * @throws NullPointerException * if {@code uri} or {@code descriptor} is {@code null} * @throws IllegalArgumentException * if {@code uri} is not a dataset or view URI * @throws DatasetExistsException * if a {@code Dataset} for the given URI already exists * @throws IncompatibleSchemaException * if the schema is not compatible with existing datasets with * shared storage (for example, in the same HBase table) */ @SuppressWarnings("unchecked") public static <V extends View<GenericRecord>> V create(URI uri, DatasetDescriptor descriptor) { return Datasets.<GenericRecord, V>create( uri, descriptor, GenericRecord.class); }
@Override public int run() throws IOException { if (targets == null || targets.isEmpty()) { throw new IllegalArgumentException("No views or datasets were specified."); } for (String uriOrName : targets) { if (isViewUri(uriOrName)) { View view = Datasets.load(uriOrName); Preconditions.checkArgument(viewMatches(view.getUri(), uriOrName), "Resolved view does not match requested view: " + view.getUri()); view.deleteAll(); } else if (isDatasetUri(uriOrName)) { Datasets.delete(uriOrName); } else { getDatasetRepository().delete(namespace, uriOrName); } console.debug("Deleted {}", uriOrName); } return 0; }
@Override public int run(List<String> args) throws Exception { String inputUri = uri; String outputUri = "dataset:hive?dataset=correlated_events"; if (args.size() == 1) { outputUri = args.get(0); } Preconditions.checkState(Datasets.exists(inputUri), "input dataset doesn't exists"); if (!Datasets.exists(outputUri)) { Datasets.create(outputUri, new DatasetDescriptor.Builder() .format("avro") .schema(CorrelatedEvents.class) .build()); } CorrelateEventsTask task = new CorrelateEventsTask(inputUri, outputUri); task.run(); return 0; }
@Override public int run(String[] args) throws Exception { // Delete the users dataset boolean success = Datasets.delete("dataset:hdfs:/tmp/data/users"); return success ? 0 : 1; }
return Datasets.<GenericRecord, D>update( uri, descriptor, GenericRecord.class);
/** * Check whether a {@link Dataset} identified by the given URI string exists. * <p> * URIs must begin with {@code dataset:}. The remainder of * the URI is implementation specific, depending on the dataset scheme. * * @param uri a {@code Dataset} URI string * @return {@code true} if the dataset exists, {@code false} otherwise * @throws NullPointerException if {@code uri} is null * @throws IllegalArgumentException if {@code uri} is not a dataset URI */ public static boolean exists(String uri) { return exists(URI.create(uri)); }
private static void printDatasetUris(Logger console, URI repoUri) { for (URI datasetUri : Datasets.list(repoUri)) { console.info(datasetUri.toString()); } }
/** * Create a {@link Dataset} for the given dataset or view URI string. * {@code create} returns an empty dataset. You can use {@code DatasetWriter} * to populate your dataset. * <p> * URIs must begin with {@code dataset:} or {@code view:}. The remainder of * the URI is implementation specific, depending on the dataset scheme. If the * URI is a view URI, this method creates the underlying dataset and returns a * view of it. * * @param uri a {@code Dataset} or {@code View} URI string * @param <V> the type of {@code Dataset} or {@code View} expected * @return a newly created {@code Dataset} responsible for the given URI * @throws NullPointerException * if {@code uri} or {@code descriptor} is {@code null} * @throws IllegalArgumentException * if {@code uri} is not a dataset or view URI * @throws DatasetExistsException * if a {@code Dataset} for the given URI already exists * @throws IncompatibleSchemaException * if the schema is not compatible with existing datasets with * shared storage (for example, in the same HBase table) */ @SuppressWarnings("unchecked") public static <V extends View<GenericRecord>> V create(String uri, DatasetDescriptor descriptor) { return Datasets.<GenericRecord, V>create( uri, descriptor, GenericRecord.class); }
@Override public int run(List<String> args) throws Exception { Preconditions.checkState(!Datasets.exists(uri), "events dataset already exists"); DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schema(StandardEvent.class).build(); View<StandardEvent> events = Datasets.create(uri, descriptor, StandardEvent.class); DatasetWriter<StandardEvent> writer = events.newWriter(); try { while (System.currentTimeMillis() - baseTimestamp < 36000) { writer.write(generateRandomEvent()); } } finally { writer.close(); } System.out.println("Generated " + counter + " events"); return 0; }
@Override public int run(String[] args) throws Exception { // Delete the users dataset boolean success = Datasets.delete("dataset:hive?dataset=users"); return success ? 0 : 1; }
public static <D extends Dataset<GenericRecord>> D update( URI uri, DatasetDescriptor descriptor) { return Datasets.<GenericRecord, D>update( uri, descriptor, GenericRecord.class);
@Override public void run() { Datasets.exists(datasetUri); } });
/** * List the {@link Dataset} URIs in the repository identified by the URI * string. * <p> * URI formats are defined by {@code Dataset} implementations. The repository * URIs you pass to this method must begin with {@code repo:}. For example, to * list the {@code Dataset} URIs for the Hive repository, provide the URI * {@code repo:hive}. * * @param uri a {@code DatasetRepository} URI string * @return the URIs present in the {@code DatasetRepository} * @throws NullPointerException if {@code URI} is null * @throws IllegalArgumentException if {@code uri} is not a repository URI */ public static Collection<URI> list(String uri) { return list(URI.create(uri)); }
private View<Record> load(ProcessContext context, FlowFile file) { String uri = context.getProperty(KITE_DATASET_URI) .evaluateAttributeExpressions(file) .getValue(); return Datasets.load(uri, Record.class); } }
private SavePolicy(Context context) { String uri = context.getString(CONFIG_KITE_ERROR_DATASET_URI); Preconditions.checkArgument(uri != null, "Must set " + CONFIG_KITE_ERROR_DATASET_URI + " when " + CONFIG_FAILURE_POLICY + "=save"); if (Datasets.exists(uri)) { dataset = Datasets.load(uri, AvroFlumeEvent.class); } else { DatasetDescriptor descriptor = new DatasetDescriptor.Builder() .schema(AvroFlumeEvent.class) .build(); dataset = Datasets.create(uri, descriptor, AvroFlumeEvent.class); } nEventsHandled = 0; }