private void runDistcp(Path from, Path to) throws Exception { info("sourcePath: " + from + ", destinationPath: " + to); EmbeddedGobblin embeddedGobblin = new EmbeddedGobblinDistcp(from, to).mrMode(); // Used for global throttling" embeddedGobblin.distributeJar("lib/*"); for (Map.Entry<String, String> entry : this.props.entrySet()) { if (entry.getKey() != null && (entry.getKey()).startsWith(ATTR_PREFIX)) { String key = (entry.getKey()).substring(ATTR_PREFIX.length()); embeddedGobblin.setConfiguration(key, entry.getValue()); } } JobExecutionResult result = embeddedGobblin.run(); if (!result.isSuccessful()) { throw new RuntimeException("Distcp job failed!", result.getErrorCause()); } }
public EmbeddedGobblinDistcp(Path from, Path to) throws JobTemplate.TemplateException, IOException { super("Distcp"); try { setTemplate(ResourceBasedJobTemplate.forResourcePath("templates/distcp.template")); } catch (URISyntaxException | SpecNotFoundException exc) { throw new RuntimeException("Could not instantiate an " + EmbeddedGobblinDistcp.class.getName(), exc); } this.setConfiguration("from", from.toString()); this.setConfiguration("to", to.toString()); // Infer source and target fs uris from the input paths this.setConfiguration(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, from.getFileSystem(new Configuration()).getUri().toString()); this.setConfiguration(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, to.getFileSystem(new Configuration()).getUri().toString()); // add gobblin-data-management jar to distributed jars this.distributeJar(ClassUtil.findContainingJar(CopySource.class)); }
@Override public EmbeddedGobblin constructEmbeddedGobblin(CommandLine cli) throws JobTemplate.TemplateException, IOException { String[] leftoverArgs = cli.getArgs(); if (leftoverArgs.length != 2) { throw new RuntimeException("Unexpected number of arguments."); } Path from = new Path(leftoverArgs[0]); Path to = new Path(leftoverArgs[1]); return new EmbeddedGobblinDistcp(from, to); }
/** * Run in simulate mode. Will log everythin it would copy, but not actually copy anything. */ public EmbeddedGobblinDistcp simulate() { this.setConfiguration(CopySource.SIMULATE, Boolean.toString(true)); return this; }
/** * If {@link #delete()} is used, specifies that newly empty parent directories should also be deleted. */ @CliObjectOption(description = "If deleting files on target, also delete newly empty parent directories.") public EmbeddedGobblinDistcp deleteEmptyParentDirectories() { this.setConfiguration(RecursiveCopyableDataset.DELETE_EMPTY_DIRECTORIES_KEY, Boolean.toString(true)); return this; }
/** * Specifies that files in the target should be updated if they have changed in the source. Equivalent to -update * option in Hadoop distcp. */ @CliObjectOption(description = "Specifies files should be updated if they're different in the source.") public EmbeddedGobblinDistcp update() { this.setConfiguration(RecursiveCopyableDataset.UPDATE_KEY, Boolean.toString(true)); return this; }
/** * Specifies that files in the target that don't exist in the source should be deleted. Equivalent to -delete * option in Hadoop distcp. */ @CliObjectOption(description = "Delete files in target that don't exist on source.") public EmbeddedGobblinDistcp delete() { this.setConfiguration(RecursiveCopyableDataset.DELETE_KEY, Boolean.toString(true)); return this; }