@Override public List<HiveDataset> findDatasets() throws IOException { return Lists.newArrayList(getDatasetsIterator()); }
public HiveDatasetFinder(FileSystem fs, Properties properties) throws IOException { this(fs, properties, createClientPool(properties)); }
/** * @deprecated Use {@link #createHiveDataset(Table, Config)} instead */ @Deprecated protected HiveDataset createHiveDataset(Table table) throws IOException { return createHiveDataset(table, ConfigFactory.empty()); }
private static List<HiveDataset> getHiveDatasets(FileSystem fs, State state) throws IOException { Preconditions.checkArgument(state.contains(ComplianceConfigurationKeys.COMPLIANCE_DATASET_WHITELIST), "Missing required property " + ComplianceConfigurationKeys.COMPLIANCE_DATASET_WHITELIST); Properties prop = new Properties(); prop.setProperty(ComplianceConfigurationKeys.HIVE_DATASET_WHITELIST, state.getProp(ComplianceConfigurationKeys.COMPLIANCE_DATASET_WHITELIST)); HiveDatasetFinder finder = new HiveDatasetFinder(fs, prop); return finder.findDatasets(); }
public void initDatasetFinder(Properties properties) throws IOException { Preconditions.checkArgument(properties.containsKey(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS), "Missing required propety " + GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS); String finderClass = properties.getProperty(GOBBLIN_COMPLIANCE_DATASET_FINDER_CLASS); this.finder = GobblinConstructorUtils.invokeConstructor(DatasetsFinder.class, finderClass, new State(properties)); Iterator<HiveDataset> datasetsIterator = new HiveDatasetFinder(FileSystem.newInstance(new Configuration()), properties).getDatasetsIterator(); while (datasetsIterator.hasNext()) { // Drop partitions from empty tables if property is set, otherwise skip the table HiveDataset hiveDataset = datasetsIterator.next(); List<Partition> partitionsFromDataset = hiveDataset.getPartitionsFromDataset(); String completeTableName = hiveDataset.getTable().getCompleteName(); if (!partitionsFromDataset.isEmpty()) { this.tableNamesList.add(completeTableName); continue; } if (!Boolean.parseBoolean(properties.getProperty(ComplianceConfigurationKeys.SHOULD_DROP_EMPTY_TABLES, ComplianceConfigurationKeys.DEFAULT_SHOULD_DROP_EMPTY_TABLES))) { continue; } if (completeTableName.contains(ComplianceConfigurationKeys.TRASH) || completeTableName .contains(ComplianceConfigurationKeys.BACKUP) || completeTableName .contains(ComplianceConfigurationKeys.STAGING)) { this.tablesToDrop.add(hiveDataset); } } }
private Iterator<DbAndTable> tables = getTables().iterator();
private Iterator<DbAndTable> tables = getTables().iterator();
@Test public void testDatasetFinder() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table3")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, ""); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 3); }
protected HiveDatasetFinder(FileSystem fs, Properties properties, ConfigClient configClient) throws IOException { this(fs, properties, createClientPool(properties), null, configClient); }
@Override protected HiveDataset computeNext() { while (this.tables.hasNext()) { DbAndTable dbAndTable = this.tables.next(); try (AutoReturnableObject<IMetaStoreClient> client = HiveDatasetFinder.this.clientPool.getClient()) { Table table = client.get().getTable(dbAndTable.getDb(), dbAndTable.getTable()); Config datasetConfig = getDatasetConfig(table); if (ConfigUtils.getBoolean(datasetConfig, HIVE_DATASET_IS_BLACKLISTED_KEY, DEFAULT_HIVE_DATASET_IS_BLACKLISTED_KEY)) { continue; } if (HiveDatasetFinder.this.eventSubmitter.isPresent()) { SlaEventSubmitter.builder().datasetUrn(dbAndTable.toString()) .eventSubmitter(HiveDatasetFinder.this.eventSubmitter.get()).eventName(DATASET_FOUND).build().submit(); } return createHiveDataset(table, datasetConfig); } catch (IllegalArgumentException e) { Throwables.propagate(e); } catch (Throwable t) { log.error(String.format("Failed to create HiveDataset for table %s.%s", dbAndTable.getDb(), dbAndTable.getTable()), t); if (HiveDatasetFinder.this.eventSubmitter.isPresent()) { SlaEventSubmitter.builder().datasetUrn(dbAndTable.toString()) .eventSubmitter(HiveDatasetFinder.this.eventSubmitter.get()).eventName(DATASET_ERROR) .additionalMetadata(FAILURE_CONTEXT, t.toString()).build().submit(); } } } return endOfData(); } };
@Test public void testException() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", TestHiveDatasetFinder.THROW_EXCEPTION)); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table3")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, ""); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 2); }
public HiveDatasetFinder(FileSystem fs, Properties properties, EventSubmitter eventSubmitter) throws IOException { this(fs, properties, createClientPool(properties), eventSubmitter); }
/** * @deprecated Use {@link #createHiveDataset(Table, Config)} instead */ @Deprecated protected HiveDataset createHiveDataset(Table table) throws IOException { return createHiveDataset(table, ConfigFactory.empty()); }
@Test public void testDatasetConfig() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, ""); properties.put("hive.dataset.test.conf1", "conf1-val1"); properties.put("hive.dataset.test.conf2", "conf2-val2"); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 1); HiveDataset hiveDataset = datasets.get(0); Assert.assertEquals(hiveDataset.getDatasetConfig().getString("hive.dataset.test.conf1"), "conf1-val1"); Assert.assertEquals(hiveDataset.getDatasetConfig().getString("hive.dataset.test.conf2"), "conf2-val2"); // Test scoped configs with prefix properties.put(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY, "hive.dataset.test"); finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 1); hiveDataset = datasets.get(0); Assert.assertEquals(hiveDataset.getDatasetConfig().getString("conf1"), "conf1-val1"); Assert.assertEquals(hiveDataset.getDatasetConfig().getString("conf2"), "conf2-val2"); }
public HiveDatasetFinder(FileSystem fs, Properties properties) throws IOException { this(fs, properties, createClientPool(properties)); }
@Override protected HiveDataset computeNext() { while (this.tables.hasNext()) { DbAndTable dbAndTable = this.tables.next(); try (AutoReturnableObject<IMetaStoreClient> client = HiveDatasetFinder.this.clientPool.getClient()) { Table table = client.get().getTable(dbAndTable.getDb(), dbAndTable.getTable()); Config datasetConfig = getDatasetConfig(table); if (ConfigUtils.getBoolean(datasetConfig, HIVE_DATASET_IS_BLACKLISTED_KEY, DEFAULT_HIVE_DATASET_IS_BLACKLISTED_KEY)) { continue; } if (HiveDatasetFinder.this.eventSubmitter.isPresent()) { SlaEventSubmitter.builder().datasetUrn(dbAndTable.toString()) .eventSubmitter(HiveDatasetFinder.this.eventSubmitter.get()).eventName(DATASET_FOUND).build().submit(); } return createHiveDataset(table, datasetConfig); } catch (IllegalArgumentException e) { Throwables.propagate(e); } catch (Throwable t) { log.error(String.format("Failed to create HiveDataset for table %s.%s", dbAndTable.getDb(), dbAndTable.getTable()), t); if (HiveDatasetFinder.this.eventSubmitter.isPresent()) { SlaEventSubmitter.builder().datasetUrn(dbAndTable.toString()) .eventSubmitter(HiveDatasetFinder.this.eventSubmitter.get()).eventName(DATASET_ERROR) .additionalMetadata(FAILURE_CONTEXT, t.toString()).build().submit(); } } } return endOfData(); } };
@Test public void testBlacklist() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db2", "table1")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, ""); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.BLACKLIST, "db2"); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 2); Assert.assertEquals(datasets.get(0).getTable().getDbName(), "db1"); Assert.assertEquals(datasets.get(1).getTable().getDbName(), "db1"); Assert.assertEquals(Sets.newHashSet(datasets.get(0).getTable().getTableName(), datasets.get(1).getTable().getTableName()), Sets.newHashSet("table1", "table2")); }
protected HiveDatasetFinder(FileSystem fs, Properties properties, ConfigClient configClient) throws IOException { this(fs, properties, createClientPool(properties), null, configClient); }
@Test public void testWhitelist() throws Exception { List<HiveDatasetFinder.DbAndTable> dbAndTables = Lists.newArrayList(); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table1")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db1", "table2")); dbAndTables.add(new HiveDatasetFinder.DbAndTable("db2", "table1")); HiveMetastoreClientPool pool = getTestPool(dbAndTables); Properties properties = new Properties(); properties.put(HiveDatasetFinder.HIVE_DATASET_PREFIX + "." + WhitelistBlacklist.WHITELIST, "db1"); HiveDatasetFinder finder = new TestHiveDatasetFinder(FileSystem.getLocal(new Configuration()), properties, pool); List<HiveDataset> datasets = Lists.newArrayList(finder.getDatasetsIterator()); Assert.assertEquals(datasets.size(), 2); Assert.assertEquals(datasets.get(0).getTable().getDbName(), "db1"); Assert.assertEquals(datasets.get(1).getTable().getDbName(), "db1"); Assert.assertEquals(Sets.newHashSet(datasets.get(0).getTable().getTableName(), datasets.get(1).getTable().getTableName()), Sets.newHashSet("table1", "table2")); }
public HiveDatasetFinder(FileSystem fs, Properties properties, EventSubmitter eventSubmitter) throws IOException { this(fs, properties, createClientPool(properties), eventSubmitter); }