protected CleanableHiveDataset createHiveDataset(Table table, Config datasetConfig) throws IOException { return new CleanableHiveDataset(super.fs, super.clientPool, new org.apache.hadoop.hive.ql.metadata.Table(table), super.properties, datasetConfig); }
log.warn(String.format("No dataset version can be found. Ignoring %s", this.getTable().getCompleteName())); return; log.info(String.format("Cleaning dataset %s .Will drop %s out of %s partitions.", datasetURN(), deletableVersions.size(), versions.size()));
@Override public void clean() throws IOException { // Possible empty directories to clean for this partition (version) Set<Path> possiblyEmptyDirectories = new HashSet<>(); try (AutoReturnableObject<IMetaStoreClient> client = cleanableHiveDataset.getClientPool().getClient()) { Partition partition = hiveDatasetVersion.getPartition(); try { if (!cleanableHiveDataset.isSimulate()) { client.get().dropPartition(partition.getTable().getDbName(), partition.getTable().getTableName(), partition.getValues(), false); log.info("Successfully dropped partition " + partition.getCompleteName()); } else { log.info("Simulating drop partition " + partition.getCompleteName()); } if (cleanableHiveDataset.isShouldDeleteData()) { cleanableHiveDataset.getFsCleanableHelper().clean(hiveDatasetVersion, possiblyEmptyDirectories); } } catch (TException | IOException e) { log.warn(String.format("Failed to completely delete partition %s.", partition.getCompleteName()), e); throw new IOException(e); } } cleanableHiveDataset.getFsCleanableHelper().cleanEmptyDirectories(possiblyEmptyDirectories, cleanableHiveDataset); }
boolean shouldReplacePartition = shouldReplacePartition(cleanableHiveDataset.getDatasetConfig(), hiveDatasetVersion.getPartition().getTable().getDbName(), hiveDatasetVersion.getPartition().getTable().getTableName(), this.replacementDbName, this.replacementTableName); try (AutoReturnableObject<IMetaStoreClient> client = cleanableHiveDataset.getClientPool().getClient()) { org.apache.hadoop.hive.metastore.api.Partition sourcePartition = client.get().getPartition( this.replacementDbName.get(), sourcePartition.getSd(), sourcePartition.getParameters()); if (!cleanableHiveDataset.isSimulate()) { client.get().add_partition(replacementPartition); log.info("Successfully swapped partition " + replacementPartition);
public HiveDatasetVersionCleaner(DatasetVersion datasetVersion, CleanableDataset cleanableDataset) { super(datasetVersion, cleanableDataset); Preconditions.checkArgument(cleanableDataset instanceof CleanableHiveDataset, String.format("%s only supports %s, " + "found: %s", this.getClass(), CleanableHiveDataset.class, cleanableDataset.getClass())); Preconditions.checkArgument(datasetVersion instanceof HiveDatasetVersion, String.format("%s only supports %s, " + "found: %s", this.getClass(), HiveDatasetVersionCleaner.class, datasetVersion.getClass())); this.cleanableHiveDataset = (CleanableHiveDataset) cleanableDataset; this.hiveDatasetVersion = (HiveDatasetVersion) datasetVersion; // For post cleanup activity: // Get db / table name from which partition has to be replaced-in for the target partition being deleted. this.replacementDbName = Optional.fromNullable(ConfigUtils.getString(cleanableHiveDataset.getDatasetConfig(), REPLACEMENT_HIVE_DB_NAME_KEY, null)); this.replacementTableName = Optional.fromNullable(ConfigUtils.getString(cleanableHiveDataset.getDatasetConfig(), REPLACEMENT_HIVE_TABLE_NAME_KEY, null)); }
public CleanableHiveDataset(FileSystem fs, HiveMetastoreClientPool clientPool, Table table, Properties jobProps, Config config) throws IOException { super(fs, clientPool, table, jobProps, config); try { this.hiveSelectionPolicy = (VersionSelectionPolicy) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(ConfigUtils.getString( this.datasetConfig, SELECTION_POLICY_CLASS_KEY, DEFAULT_SELECTION_POLICY_CLASS)), ImmutableList.<Object> of( this.datasetConfig, jobProps), ImmutableList.<Object> of(this.datasetConfig), ImmutableList.<Object> of(jobProps)); log.info(String.format("Configured selection policy %s for dataset:%s with config %s", ConfigUtils.getString(this.datasetConfig, SELECTION_POLICY_CLASS_KEY, DEFAULT_SELECTION_POLICY_CLASS), datasetURN(), this.datasetConfig.root().render(ConfigRenderOptions.concise()))); this.hiveDatasetVersionFinder = (AbstractHiveDatasetVersionFinder) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(ConfigUtils .getString(this.datasetConfig, VERSION_FINDER_CLASS_KEY, DEFAULT_VERSION_FINDER_CLASS)), ImmutableList .<Object> of(this.fs, this.datasetConfig), ImmutableList.<Object> of(this.fs, jobProps)); } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException | ClassNotFoundException e) { log.error("Failed to instantiate CleanableHiveDataset", e); throw new IllegalArgumentException(e); } this.fsCleanableHelper = new FsCleanableHelper(fs, jobProps, this.datasetConfig, log); this.shouldDeleteData = Boolean.valueOf(jobProps.getProperty(SHOULD_DELETE_DATA_KEY, SHOULD_DELETE_DATA_DEFAULT)); this.simulate = Boolean.valueOf(jobProps.getProperty(FsCleanableHelper.SIMULATE_KEY, FsCleanableHelper.SIMULATE_DEFAULT)); }
boolean shouldReplacePartition = shouldReplacePartition(cleanableHiveDataset.getDatasetConfig(), hiveDatasetVersion.getPartition().getTable().getDbName(), hiveDatasetVersion.getPartition().getTable().getTableName(), this.replacementDbName, this.replacementTableName); try (AutoReturnableObject<IMetaStoreClient> client = cleanableHiveDataset.getClientPool().getClient()) { org.apache.hadoop.hive.metastore.api.Partition sourcePartition = client.get().getPartition( this.replacementDbName.get(), sourcePartition.getSd(), sourcePartition.getParameters()); if (!cleanableHiveDataset.isSimulate()) { client.get().add_partition(replacementPartition); log.info("Successfully swapped partition " + replacementPartition);
public HiveDatasetVersionCleaner(DatasetVersion datasetVersion, CleanableDataset cleanableDataset) { super(datasetVersion, cleanableDataset); Preconditions.checkArgument(cleanableDataset instanceof CleanableHiveDataset, String.format("%s only supports %s, " + "found: %s", this.getClass(), CleanableHiveDataset.class, cleanableDataset.getClass())); Preconditions.checkArgument(datasetVersion instanceof HiveDatasetVersion, String.format("%s only supports %s, " + "found: %s", this.getClass(), HiveDatasetVersionCleaner.class, datasetVersion.getClass())); this.cleanableHiveDataset = (CleanableHiveDataset) cleanableDataset; this.hiveDatasetVersion = (HiveDatasetVersion) datasetVersion; // For post cleanup activity: // Get db / table name from which partition has to be replaced-in for the target partition being deleted. this.replacementDbName = Optional.fromNullable(ConfigUtils.getString(cleanableHiveDataset.getDatasetConfig(), REPLACEMENT_HIVE_DB_NAME_KEY, null)); this.replacementTableName = Optional.fromNullable(ConfigUtils.getString(cleanableHiveDataset.getDatasetConfig(), REPLACEMENT_HIVE_TABLE_NAME_KEY, null)); }
public CleanableHiveDataset(FileSystem fs, HiveMetastoreClientPool clientPool, Table table, Properties jobProps, Config config) throws IOException { super(fs, clientPool, table, jobProps, config); try { this.hiveSelectionPolicy = (VersionSelectionPolicy) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(ConfigUtils.getString( this.datasetConfig, SELECTION_POLICY_CLASS_KEY, DEFAULT_SELECTION_POLICY_CLASS)), ImmutableList.<Object> of( this.datasetConfig, jobProps), ImmutableList.<Object> of(this.datasetConfig), ImmutableList.<Object> of(jobProps)); log.info(String.format("Configured selection policy %s for dataset:%s with config %s", ConfigUtils.getString(this.datasetConfig, SELECTION_POLICY_CLASS_KEY, DEFAULT_SELECTION_POLICY_CLASS), datasetURN(), this.datasetConfig.root().render(ConfigRenderOptions.concise()))); this.hiveDatasetVersionFinder = (AbstractHiveDatasetVersionFinder) GobblinConstructorUtils.invokeFirstConstructor(Class.forName(ConfigUtils .getString(this.datasetConfig, VERSION_FINDER_CLASS_KEY, DEFAULT_VERSION_FINDER_CLASS)), ImmutableList .<Object> of(this.fs, this.datasetConfig), ImmutableList.<Object> of(this.fs, jobProps)); } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | InstantiationException | ClassNotFoundException e) { log.error("Failed to instantiate CleanableHiveDataset", e); throw new IllegalArgumentException(e); } this.fsCleanableHelper = new FsCleanableHelper(fs, jobProps, this.datasetConfig, log); this.shouldDeleteData = Boolean.valueOf(jobProps.getProperty(SHOULD_DELETE_DATA_KEY, SHOULD_DELETE_DATA_DEFAULT)); this.simulate = Boolean.valueOf(jobProps.getProperty(FsCleanableHelper.SIMULATE_KEY, FsCleanableHelper.SIMULATE_DEFAULT)); }
@Override public void clean() throws IOException { // Possible empty directories to clean for this partition (version) Set<Path> possiblyEmptyDirectories = new HashSet<>(); try (AutoReturnableObject<IMetaStoreClient> client = cleanableHiveDataset.getClientPool().getClient()) { Partition partition = hiveDatasetVersion.getPartition(); try { if (!cleanableHiveDataset.isSimulate()) { client.get().dropPartition(partition.getTable().getDbName(), partition.getTable().getTableName(), partition.getValues(), false); log.info("Successfully dropped partition " + partition.getCompleteName()); } else { log.info("Simulating drop partition " + partition.getCompleteName()); } if (cleanableHiveDataset.isShouldDeleteData()) { cleanableHiveDataset.getFsCleanableHelper().clean(hiveDatasetVersion, possiblyEmptyDirectories); } } catch (TException | IOException e) { log.warn(String.format("Failed to completely delete partition %s.", partition.getCompleteName()), e); throw new IOException(e); } } cleanableHiveDataset.getFsCleanableHelper().cleanEmptyDirectories(possiblyEmptyDirectories, cleanableHiveDataset); }
log.warn(String.format("No dataset version can be found. Ignoring %s", this.getTable().getCompleteName())); return; log.info(String.format("Cleaning dataset %s .Will drop %s out of %s partitions.", datasetURN(), deletableVersions.size(), versions.size()));
protected CleanableHiveDataset createHiveDataset(Table table, Config datasetConfig) throws IOException { return new CleanableHiveDataset(super.fs, super.clientPool, new org.apache.hadoop.hive.ql.metadata.Table(table), super.properties, datasetConfig); }