/** * Run in simulate mode. Will log everythin it would copy, but not actually copy anything. */ public EmbeddedGobblinDistcp simulate() { this.setConfiguration(CopySource.SIMULATE, Boolean.toString(true)); return this; }
/** * If {@link #delete()} is used, specifies that newly empty parent directories should also be deleted. */ @CliObjectOption(description = "If deleting files on target, also delete newly empty parent directories.") public EmbeddedGobblinDistcp deleteEmptyParentDirectories() { this.setConfiguration(RecursiveCopyableDataset.DELETE_EMPTY_DIRECTORIES_KEY, Boolean.toString(true)); return this; }
/** * Specifies that files in the target should be updated if they have changed in the source. Equivalent to -update * option in Hadoop distcp. */ @CliObjectOption(description = "Specifies files should be updated if they're different in the source.") public EmbeddedGobblinDistcp update() { this.setConfiguration(RecursiveCopyableDataset.UPDATE_KEY, Boolean.toString(true)); return this; }
/** * Specifies that files in the target that don't exist in the source should be deleted. Equivalent to -delete * option in Hadoop distcp. */ @CliObjectOption(description = "Delete files in target that don't exist on source.") public EmbeddedGobblinDistcp delete() { this.setConfiguration(RecursiveCopyableDataset.DELETE_KEY, Boolean.toString(true)); return this; }
public EmbeddedGobblinDistcp(Path from, Path to) throws JobTemplate.TemplateException, IOException { super("Distcp"); try { setTemplate(ResourceBasedJobTemplate.forResourcePath("templates/distcp.template")); } catch (URISyntaxException | SpecNotFoundException exc) { throw new RuntimeException("Could not instantiate an " + EmbeddedGobblinDistcp.class.getName(), exc); } this.setConfiguration("from", from.toString()); this.setConfiguration("to", to.toString()); // Infer source and target fs uris from the input paths this.setConfiguration(ConfigurationKeys.SOURCE_FILEBASED_FS_URI, from.getFileSystem(new Configuration()).getUri().toString()); this.setConfiguration(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, to.getFileSystem(new Configuration()).getUri().toString()); // add gobblin-data-management jar to distributed jars this.distributeJar(ClassUtil.findContainingJar(CopySource.class)); }