target.getUri().toString(), timer.getDuration(TimeUnit.MILLISECONDS), true /* cannot roll back the write */);
private static boolean isHive(View<?> view) { return "hive".equals( URI.create(view.getUri().getSchemeSpecificPart()).getScheme()); }
private static <E> boolean usePerTaskAttemptDatasets(View<E> target, Configuration conf) { // For performance reasons we should skip the intermediate task attempt and job output datasets if the // file system does not support efficient renaming (such as S3), and write to the target dataset directly. if (!FileSystemUtil.supportsRename(URI.create(target.getUri().getSchemeSpecificPart()), conf)) { return false; } // new API output committers are not called properly in Hadoop 1 return !Hadoop.isHadoop1() && target.getDataset() instanceof Mergeable; }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to write to the * given {@link Dataset} or {@link View} instance. * * @param view a dataset or view * @return this for method chaining */ public ConfigBuilder writeTo(View<?> view) { if (view instanceof FileSystemDataset) { FileSystemDataset dataset = (FileSystemDataset) view; conf.set(KITE_PARTITION_DIR, String.valueOf(dataset.getDescriptor().getLocation())); } withType(view.getType()); return writeTo(view.getUri()); }
@Override public int run() throws IOException { if (targets == null || targets.isEmpty()) { throw new IllegalArgumentException("No views or datasets were specified."); } for (String uriOrName : targets) { if (isViewUri(uriOrName)) { View view = Datasets.load(uriOrName); Preconditions.checkArgument(viewMatches(view.getUri(), uriOrName), "Resolved view does not match requested view: " + view.getUri()); view.deleteAll(); } else if (isDatasetUri(uriOrName)) { Datasets.delete(uriOrName); } else { getDatasetRepository().delete(namespace, uriOrName); } console.debug("Deleted {}", uriOrName); } return 0; }
/** * Adds configuration for {@code DatasetKeyOutputFormat} to write to the * given {@link Dataset} or {@link View} instance. * * @param view a dataset or view * @return this for method chaining */ public ConfigBuilder writeTo(View<?> view) { if (view instanceof FileSystemDataset) { FileSystemDataset dataset = (FileSystemDataset) view; conf.set(KITE_PARTITION_DIR, String.valueOf(dataset.getDescriptor().getLocation())); } withType(view.getType()); return writeTo(view.getUri()); }
@Test public void testURIStringEquality() { for(int i = 0; i < 10; i++) { String a = UUID.randomUUID().toString(); String b = UUID.randomUUID().toString(); String originalUri = "view:file:/tmp/test_name?color="+ a + "," + b; View<GenericRecord> view = Datasets.load(originalUri); String afterUri = view.getUri().toString(); if(!originalUri.equals(afterUri)) { System.out.println("Iteration: " + i); System.out.println("Start: " + originalUri); System.out.println("End : " + afterUri); } Assert.assertEquals(originalUri, afterUri); } }
@Override public int run() throws IOException { Preconditions.checkArgument(datasets.size() == 1, "Cannot compact multiple datasets"); String uriOrName = datasets.get(0); View<Record> view = load(uriOrName, Record.class); if (isDatasetOrViewUri(uriOrName)) { Preconditions.checkArgument(viewMatches(view.getUri(), uriOrName), "Resolved view does not match requested view: " + view.getUri()); } CompactionTask task = new CompactionTask<Record>(view); task.setConf(getConf()); if (numWriters >= 0) { task.setNumWriters(numWriters); } if (filesPerPartition > 0) { task.setFilesPerPartition(filesPerPartition); } PipelineResult result = task.run(); if (result.succeeded()) { console.info("Compacted {} records in \"{}\"", task.getCount(), uriOrName); return 0; } else { return 1; } }
public void assertViewUriEquivalent(String desc, String uri, View<GenericRecord> view) { View<GenericRecord> loaded = Datasets.load(uri); Assert.assertEquals("URI should produce the correct View (" + desc + ")", view, loaded); URI loadedUri = loaded.getUri(); View<GenericRecord> reloaded = Datasets.load(loadedUri); Assert.assertEquals("Loaded URI should also load correctly (" + desc + ")", view, reloaded); Assert.assertEquals("URI should be consistent after load (" + desc + ")", loadedUri, reloaded.getUri()); } }
/** * Adds configuration for {@code DatasetKeyInputFormat} to read from the * given {@link Dataset} or {@link View} instance. * * @param view a dataset or view * @return this for method chaining */ public ConfigBuilder readFrom(View<?> view) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); // if this is a partitioned dataset, add the partition location if (view instanceof FileSystemDataset) { conf.set(KITE_PARTITION_DIR, String.valueOf(descriptor.getLocation())); } // add descriptor properties to the config for (String property : descriptor.listProperties()) { conf.set(property, descriptor.getProperty(property)); } if (DataModelUtil.isGeneric(view.getType())) { Schema datasetSchema = view.getDataset().getDescriptor().getSchema(); // only set the read schema if the view is a projection if (!datasetSchema.equals(view.getSchema())) { withSchema(view.getSchema()); } } else { withType(view.getType()); } conf.set(KITE_INPUT_URI, view.getUri().toString()); return this; }
/** * Adds configuration for {@code DatasetKeyInputFormat} to read from the * given {@link Dataset} or {@link View} instance. * * @param view a dataset or view * @return this for method chaining */ public ConfigBuilder readFrom(View<?> view) { DatasetDescriptor descriptor = view.getDataset().getDescriptor(); // if this is a partitioned dataset, add the partition location if (view instanceof FileSystemDataset) { conf.set(KITE_PARTITION_DIR, String.valueOf(descriptor.getLocation())); } // add descriptor properties to the config for (String property : descriptor.listProperties()) { conf.set(property, descriptor.getProperty(property)); } if (DataModelUtil.isGeneric(view.getType())) { Schema datasetSchema = view.getDataset().getDescriptor().getSchema(); // only set the read schema if the view is a projection if (!datasetSchema.equals(view.getSchema())) { withSchema(view.getSchema()); } } else { withType(view.getType()); } conf.set(KITE_INPUT_URI, view.getUri().toString()); return this; }
target.getUri().toString(), timer.getDuration(TimeUnit.MILLISECONDS), true /* cannot roll back the write */);