/** * Constructor for {@link MultiVersionCleanableDatasetBase}. * @param fs {@link org.apache.hadoop.fs.FileSystem} where files are located. * @param properties {@link java.util.Properties} for object. * @param simulate whether to simulate deletes. * @param skipTrash if true, delete files and directories immediately. * @param deleteEmptyDirectories if true, newly empty parent directories will be deleted. * @param deleteAsOwner if true, all deletions will be executed as the owner of the file / directory. * @param log logger to use. * @param isDatasetBlacklisted if true, clean will be skipped for this dataset * * @throws IOException */ public MultiVersionCleanableDatasetBase(FileSystem fs, Properties properties, boolean simulate, boolean skipTrash, boolean deleteEmptyDirectories, boolean deleteAsOwner, boolean isDatasetBlacklisted, Logger log) throws IOException { this.log = log; this.fsCleanableHelper = new FsCleanableHelper(fs, properties, simulate, skipTrash, deleteEmptyDirectories, deleteAsOwner, log); this.fs = fs; this.simulate = simulate; this.skipTrash = skipTrash; this.deleteEmptyDirectories = deleteEmptyDirectories; this.trash = this.fsCleanableHelper.getTrash(); this.deleteAsOwner = deleteAsOwner; this.isDatasetBlacklisted = isDatasetBlacklisted; }
/** * Delete all {@link FileSystemDatasetVersion}s <code>deletableVersions</code> and also delete any empty parent directories. * * @param fsDataset to which the version belongs. */ public void clean(final Collection<? extends FileSystemDatasetVersion> deletableVersions, final FileSystemDataset fsDataset) throws IOException { if (deletableVersions.isEmpty()) { log.warn("No deletable dataset version can be found. Ignoring."); return; } Set<Path> possiblyEmptyDirectories = new HashSet<>(); for (FileSystemDatasetVersion fsdv : deletableVersions) { clean(fsdv, possiblyEmptyDirectories); } cleanEmptyDirectories(possiblyEmptyDirectories, fsDataset); }
Properties props = new Properties(); props.setProperty(FsCleanableHelper.SKIP_TRASH_KEY, Boolean.toString(true)); FsCleanableHelper fsCleanableHelper = new FsCleanableHelper(this.fs, props, ConfigFactory.empty(), log); FileSystemDataset fsDataset = mock(FileSystemDataset.class); Path datasetRoot = new Path(testTempPath, "dataset1"); new MockFileSystemDatasetVersion(deleted3)); fsCleanableHelper.clean(deletableVersions, fsDataset);
protected void cleanImpl(Collection<T> deletableVersions) throws IOException { this.fsCleanableHelper.clean(deletableVersions, this); }
public CleanableHiveDataset(FileSystem fs, HiveMetastoreClientPool clientPool, Table table, Properties jobProps, Config config) throws IOException { super(fs, clientPool, table, jobProps, config); try { this.hiveSelectionPolicy = (VersionSelectionPolicy) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(ConfigUtils.getString( this.datasetConfig, SELECTION_POLICY_CLASS_KEY, DEFAULT_SELECTION_POLICY_CLASS)), ImmutableList.<Object> of( this.datasetConfig, jobProps), ImmutableList.<Object> of(this.datasetConfig), ImmutableList.<Object> of(jobProps)); log.info(String.format("Configured selection policy %s for dataset:%s with config %s", ConfigUtils.getString(this.datasetConfig, SELECTION_POLICY_CLASS_KEY, DEFAULT_SELECTION_POLICY_CLASS), datasetURN(), this.datasetConfig.root().render(ConfigRenderOptions.concise()))); this.hiveDatasetVersionFinder = (AbstractHiveDatasetVersionFinder) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(ConfigUtils .getString(this.datasetConfig, VERSION_FINDER_CLASS_KEY, DEFAULT_VERSION_FINDER_CLASS)), ImmutableList .<Object> of(this.fs, this.datasetConfig), ImmutableList.<Object> of(this.fs, jobProps)); } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException | ClassNotFoundException e) { log.error("Failed to instantiate CleanableHiveDataset", e); throw new IllegalArgumentException(e); } this.fsCleanableHelper = new FsCleanableHelper(fs, jobProps, this.datasetConfig, log); this.shouldDeleteData = Boolean.valueOf(jobProps.getProperty(SHOULD_DELETE_DATA_KEY, SHOULD_DELETE_DATA_DEFAULT)); this.simulate = Boolean.valueOf(jobProps.getProperty(FsCleanableHelper.SIMULATE_KEY, FsCleanableHelper.SIMULATE_DEFAULT)); }
protected void cleanImpl(Collection<T> deletableVersions) throws IOException { this.fsCleanableHelper.clean(deletableVersions, this); }
public CleanableHiveDataset(FileSystem fs, HiveMetastoreClientPool clientPool, Table table, Properties jobProps, Config config) throws IOException { super(fs, clientPool, table, jobProps, config); try { this.hiveSelectionPolicy = (VersionSelectionPolicy) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(ConfigUtils.getString( this.datasetConfig, SELECTION_POLICY_CLASS_KEY, DEFAULT_SELECTION_POLICY_CLASS)), ImmutableList.<Object> of( this.datasetConfig, jobProps), ImmutableList.<Object> of(this.datasetConfig), ImmutableList.<Object> of(jobProps)); log.info(String.format("Configured selection policy %s for dataset:%s with config %s", ConfigUtils.getString(this.datasetConfig, SELECTION_POLICY_CLASS_KEY, DEFAULT_SELECTION_POLICY_CLASS), datasetURN(), this.datasetConfig.root().render(ConfigRenderOptions.concise()))); this.hiveDatasetVersionFinder = (AbstractHiveDatasetVersionFinder) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(ConfigUtils .getString(this.datasetConfig, VERSION_FINDER_CLASS_KEY, DEFAULT_VERSION_FINDER_CLASS)), ImmutableList .<Object> of(this.fs, this.datasetConfig), ImmutableList.<Object> of(this.fs, jobProps)); } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException | ClassNotFoundException e) { log.error("Failed to instantiate CleanableHiveDataset", e); throw new IllegalArgumentException(e); } this.fsCleanableHelper = new FsCleanableHelper(fs, jobProps, this.datasetConfig, log); this.shouldDeleteData = Boolean.valueOf(jobProps.getProperty(SHOULD_DELETE_DATA_KEY, SHOULD_DELETE_DATA_DEFAULT)); this.simulate = Boolean.valueOf(jobProps.getProperty(FsCleanableHelper.SIMULATE_KEY, FsCleanableHelper.SIMULATE_DEFAULT)); }
@Test public void testSimulateTrash() throws Exception { Properties props = new Properties(); props.setProperty(FsCleanableHelper.SIMULATE_KEY, Boolean.toString(true)); FsCleanableHelper fsCleanableHelper = new FsCleanableHelper(this.fs, props, ConfigFactory.empty(), log); assertThat(fsCleanableHelper.getTrash(), instanceOf(MockTrash.class)); }
@Override public void clean() throws IOException { // Possible empty directories to clean for this partition (version) Set<Path> possiblyEmptyDirectories = new HashSet<>(); try (AutoReturnableObject<IMetaStoreClient> client = cleanableHiveDataset.getClientPool().getClient()) { Partition partition = hiveDatasetVersion.getPartition(); try { if (!cleanableHiveDataset.isSimulate()) { client.get().dropPartition(partition.getTable().getDbName(), partition.getTable().getTableName(), partition.getValues(), false); log.info("Successfully dropped partition " + partition.getCompleteName()); } else { log.info("Simulating drop partition " + partition.getCompleteName()); } if (cleanableHiveDataset.isShouldDeleteData()) { cleanableHiveDataset.getFsCleanableHelper().clean(hiveDatasetVersion, possiblyEmptyDirectories); } } catch (TException | IOException e) { log.warn(String.format("Failed to completely delete partition %s.", partition.getCompleteName()), e); throw new IOException(e); } } cleanableHiveDataset.getFsCleanableHelper().cleanEmptyDirectories(possiblyEmptyDirectories, cleanableHiveDataset); }
/** * Constructor for {@link MultiVersionCleanableDatasetBase}. * @param fs {@link org.apache.hadoop.fs.FileSystem} where files are located. * @param properties {@link java.util.Properties} for object. * @param simulate whether to simulate deletes. * @param skipTrash if true, delete files and directories immediately. * @param deleteEmptyDirectories if true, newly empty parent directories will be deleted. * @param deleteAsOwner if true, all deletions will be executed as the owner of the file / directory. * @param log logger to use. * @param isDatasetBlacklisted if true, clean will be skipped for this dataset * * @throws IOException */ public MultiVersionCleanableDatasetBase(FileSystem fs, Properties properties, boolean simulate, boolean skipTrash, boolean deleteEmptyDirectories, boolean deleteAsOwner, boolean isDatasetBlacklisted, Logger log) throws IOException { this.log = log; this.fsCleanableHelper = new FsCleanableHelper(fs, properties, simulate, skipTrash, deleteEmptyDirectories, deleteAsOwner, log); this.fs = fs; this.simulate = simulate; this.skipTrash = skipTrash; this.deleteEmptyDirectories = deleteEmptyDirectories; this.trash = this.fsCleanableHelper.getTrash(); this.deleteAsOwner = deleteAsOwner; this.isDatasetBlacklisted = isDatasetBlacklisted; }
/** * Delete all {@link FileSystemDatasetVersion}s <code>deletableVersions</code> and also delete any empty parent directories. * * @param fsDataset to which the version belongs. */ public void clean(final Collection<? extends FileSystemDatasetVersion> deletableVersions, final FileSystemDataset fsDataset) throws IOException { if (deletableVersions.isEmpty()) { log.warn("No deletable dataset version can be found. Ignoring."); return; } Set<Path> possiblyEmptyDirectories = new HashSet<>(); for (FileSystemDatasetVersion fsdv : deletableVersions) { clean(fsdv, possiblyEmptyDirectories); } cleanEmptyDirectories(possiblyEmptyDirectories, fsDataset); }
@Override public void clean() throws IOException { // Possible empty directories to clean for this partition (version) Set<Path> possiblyEmptyDirectories = new HashSet<>(); try (AutoReturnableObject<IMetaStoreClient> client = cleanableHiveDataset.getClientPool().getClient()) { Partition partition = hiveDatasetVersion.getPartition(); try { if (!cleanableHiveDataset.isSimulate()) { client.get().dropPartition(partition.getTable().getDbName(), partition.getTable().getTableName(), partition.getValues(), false); log.info("Successfully dropped partition " + partition.getCompleteName()); } else { log.info("Simulating drop partition " + partition.getCompleteName()); } if (cleanableHiveDataset.isShouldDeleteData()) { cleanableHiveDataset.getFsCleanableHelper().clean(hiveDatasetVersion, possiblyEmptyDirectories); } } catch (TException | IOException e) { log.warn(String.format("Failed to completely delete partition %s.", partition.getCompleteName()), e); throw new IOException(e); } } cleanableHiveDataset.getFsCleanableHelper().cleanEmptyDirectories(possiblyEmptyDirectories, cleanableHiveDataset); }