private static List<Partition> getPartitions(String completeTableName) { List<String> tableList = At_SPLITTER.splitToList(completeTableName); if (tableList.size() != 2) { log.warn("Invalid table name " + completeTableName); return Collections.EMPTY_LIST; } try (AutoReturnableObject<IMetaStoreClient> client = ComplianceRetentionJob.pool.getClient()) { Table table = client.get().getTable(tableList.get(0), tableList.get(1)); HiveDataset dataset = new HiveDataset(FileSystem.newInstance(new Configuration()), ComplianceRetentionJob.pool, new org.apache.hadoop.hive.ql.metadata.Table(table), new Properties()); return dataset.getPartitionsFromDataset(); } catch (IOException | TException e) { log.warn("Unable to get Partitions for table " + completeTableName + " " + e.getMessage()); } return Collections.EMPTY_LIST; } }
public void initDatasetFinder(Properties properties) throws IOException { Preconditions.checkArgument(properties.containsKey(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS), "Missing required propety " + GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS); String finderClass = properties.getProperty(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS); this.finder = GobblinConstructorUtils.invokeConstructor(DatasetsFinder.class, finderClass, new State(properties)); Iterator<HiveDataset> datasetsIterator = new HiveDatasetFinder(FileSystem.newInstance(new Configuration()), properties).getDatasetsIterator(); while (datasetsIterator.hasNext()) { // Drop partitions from empty tables if property is set, otherwise skip the table HiveDataset hiveDataset = datasetsIterator.next(); List<Partition> partitionsFromDataset = hiveDataset.getPartitionsFromDataset(); String completeTableName = hiveDataset.getTable().getCompleteName(); if (!partitionsFromDataset.isEmpty()) { this.tableNamesList.add(completeTableName); continue; } if (!Boolean.parseBoolean(properties.getProperty(ComplianceConfigurationKeys.SHOULD_DROP_EMPTY_TABLES, ComplianceConfigurationKeys.DEFAULT_SHOULD_DROP_EMPTY_TABLES))) { continue; } if (completeTableName.contains(ComplianceConfigurationKeys.TRASH) || completeTableName .contains(ComplianceConfigurationKeys.BACKUP) || completeTableName .contains(ComplianceConfigurationKeys.STAGING)) { this.tablesToDrop.add(hiveDataset); } } }
/** * Will find all datasets according to whitelist, except the backup, trash and staging tables. */ @Override public List<HivePartitionDataset> findDatasets() throws IOException { List<HivePartitionDataset> list = new ArrayList<>(); for (HiveDataset hiveDataset : this.hiveDatasets) { for (Partition partition : hiveDataset.getPartitionsFromDataset()) { list.add(new HivePartitionDataset(partition)); } } String selectionPolicyString = this.state.getProp(ComplianceConfigurationKeys.DATASET_SELECTION_POLICY_CLASS, ComplianceConfigurationKeys.DEFAULT_DATASET_SELECTION_POLICY_CLASS); Policy<HivePartitionDataset> selectionPolicy = GobblinConstructorUtils.invokeConstructor(Policy.class, selectionPolicyString); return selectionPolicy.selectedList(list); }