@Override public List<HiveDataset> findDatasets() throws IOException { return Lists.newArrayList(getDatasetsIterator()); }
public void initDatasetFinder(Properties properties) throws IOException { Preconditions.checkArgument(properties.containsKey(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS), "Missing required propety " + GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS); String finderClass = properties.getProperty(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS); this.finder = GobblinConstructorUtils.invokeConstructor(DatasetsFinder.class, finderClass, new State(properties)); Iterator<HiveDataset> datasetsIterator = new HiveDatasetFinder(FileSystem.newInstance(new Configuration()), properties).getDatasetsIterator(); while (datasetsIterator.hasNext()) { // Drop partitions from empty tables if property is set, otherwise skip the table HiveDataset hiveDataset = datasetsIterator.next(); List<Partition> partitionsFromDataset = hiveDataset.getPartitionsFromDataset(); String completeTableName = hiveDataset.getTable().getCompleteName(); if (!partitionsFromDataset.isEmpty()) { this.tableNamesList.add(completeTableName); continue; } if (!Boolean.parseBoolean(properties.getProperty(ComplianceConfigurationKeys.SHOULD_DROP_EMPTY_TABLES, ComplianceConfigurationKeys.DEFAULT_SHOULD_DROP_EMPTY_TABLES))) { continue; } if (completeTableName.contains(ComplianceConfigurationKeys.TRASH) || completeTableName .contains(ComplianceConfigurationKeys.BACKUP) || completeTableName .contains(ComplianceConfigurationKeys.STAGING)) { this.tablesToDrop.add(hiveDataset); } } }
@Test public void testDatasetFinder() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table3")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, ""); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 3); }
@Test public void testException() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", TestHiveDatasetFinder.THROW_EXCEPTION)); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table3")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, ""); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 2); }
@Test public void testDatasetConfig() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, ""); properties.put("hive.dataset.test.conf1", "conf1-val1"); properties.put("hive.dataset.test.conf2", "conf2-val2"); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 1); HiveDataset hiveDataset = datasets.get(0); Assert.assertEquals(hiveDataset.getDatasetConfig().getString("hive.dataset.test.conf1"), "conf1-val1"); Assert.assertEquals(hiveDataset.getDatasetConfig().getString("hive.dataset.test.conf2"), "conf2-val2"); // Test scoped configs with prefix properties.put(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY, "hive.dataset.test"); finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 1); hiveDataset = datasets.get(0); Assert.assertEquals(hiveDataset.getDatasetConfig().getString("conf1"), "conf1-val1"); Assert.assertEquals(hiveDataset.getDatasetConfig().getString("conf2"), "conf2-val2"); }
@Test public void testBlacklist() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db2", "table1")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, ""); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.BLACKLIST, "db2"); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 2); Assert.assertEquals(datasets.get(0).getTable().getDbName(), "db1"); Assert.assertEquals(datasets.get(1).getTable().getDbName(), "db1"); Assert.assertEquals(Sets.newHashSet(datasets.get(0).getTable().getTableName(), datasets.get(1).getTable().getTableName()), Sets.newHashSet("table1", "table2")); }
@Test public void testWhitelist() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db2", "table1")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, "db1"); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 2); Assert.assertEquals(datasets.get(0).getTable().getDbName(), "db1"); Assert.assertEquals(datasets.get(1).getTable().getDbName(), "db1"); Assert.assertEquals(Sets.newHashSet(datasets.get(0).getTable().getTableName(), datasets.get(1).getTable().getTableName()), Sets.newHashSet("table1", "table2")); }
@Test public void testTableList() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table3")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db2", "table1")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.DB_KEY, "db1"); properties.put(HiveDatasetFinder.TABLE_PATTERN_KEY, "table1|table2"); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 2); Assert.assertEquals(datasets.get(0).getTable().getDbName(), "db1"); Assert.assertEquals(datasets.get(1).getTable().getDbName(), "db1"); Assert.assertEquals(Sets.newHashSet(datasets.get(0).getTable().getTableName(), datasets.get(1).getTable().getTableName()), Sets.newHashSet("table1", "table2")); }
@Override public List<HiveDataset> findDatasets() throws IOException { return Lists.newArrayList(getDatasetsIterator()); }