@Override public ConfigurableCleanableDataset<FileSystemDatasetVersion> datasetAtPath(Path path) throws IOException { return new ConfigurableCleanableDataset<>(this.fs, this.props, path, LoggerFactory.getLogger(ConfigurableCleanableDataset.class)); } }
private void initWithRetentionPolicy(Config config, Properties jobProps, String retentionPolicyKey, String versionFinderKey) { this.versionFindersAndPolicies .add(new VersionFinderAndPolicy<>(createRetentionPolicy(config.getString(retentionPolicyKey), config, jobProps), createVersionFinder(config.getString(versionFinderKey), config, jobProps))); }
/** * Creates a new ConfigurableCleanableDataset configured through gobblin-config-management. The constructor expects * {@link #VERSION_FINDER_CLASS_KEY} and {@link #RETENTION_POLICY_CLASS_KEY} to be available in the * <code>config</code> passed. */ public ConfigurableCleanableDataset(FileSystem fs, Properties jobProps, Path datasetRoot, Config config, Logger log) throws IOException { super(fs, jobProps, config, log); this.datasetRoot = datasetRoot; this.versionFindersAndPolicies = Lists.newArrayList(); if (config.hasPath(DATASET_VERSION_POLICY_ALIAS)) { initWithSelectionPolicy(config.getConfig(DATASET_VERSION_POLICY_ALIAS), jobProps); } else if (config.hasPath(VERSION_FINDER_CLASS_KEY) && config.hasPath(RETENTION_POLICY_CLASS_KEY)) { initWithRetentionPolicy(config, jobProps, RETENTION_POLICY_CLASS_KEY, VERSION_FINDER_CLASS_KEY); } else if (config.hasPath(VERSION_FINDER_CLASS_KEY)) { initWithSelectionPolicy(config.getConfig(RETENTION_CONFIGURATION_KEY), jobProps); } else if (config.hasPath(DATASET_PARTITIONS_LIST_KEY)) { List<? extends Config> versionAndPolicies = config.getConfigList(DATASET_PARTITIONS_LIST_KEY); for (Config versionAndPolicy : versionAndPolicies) { initWithSelectionPolicy(versionAndPolicy, jobProps); } } else { throw new IllegalArgumentException( String.format("Either set version finder at %s and retention policy at %s or set partitions at %s", VERSION_FINDER_CLASS_KEY, RETENTION_POLICY_CLASS_KEY, DATASET_PARTITIONS_LIST_KEY)); } }
@Test public void testDatasetIsBlacklisted() throws Exception { Config conf = ConfigFactory.parseMap(ImmutableMap.<String, String> of("gobblin.retention.version.finder.class", "org.apache.gobblin.data.management.version.finder.WatermarkDatasetVersionFinder", "gobblin.retention.selection.policy.class", "org.apache.gobblin.data.management.policy.NewestKSelectionPolicy", "gobblin.retention.selection.newestK.versionsSelected", "2", "gobblin.retention.dataset.is.blacklisted", "true")); ConfigurableCleanableDataset<FileSystemDatasetVersion> dataset = new ConfigurableCleanableDataset<FileSystemDatasetVersion>(FileSystem.get(new URI(ConfigurationKeys.LOCAL_FS_URI), new Configuration()), new Properties(), new Path("/someroot"), conf, LoggerFactory.getLogger(ConfigurableCleanableDatasetTest.class)); Assert.assertEquals(dataset.isDatasetBlacklisted(), true); } }
@Test public void testConfigureWithRetentionPolicy() throws Exception { Config conf = ConfigFactory.parseMap(ImmutableMap.<String, String> of("gobblin.retention.version.finder.class", "org.apache.gobblin.data.management.version.finder.WatermarkDatasetVersionFinder", "gobblin.retention.retention.policy.class", "org.apache.gobblin.data.management.retention.policy.NewestKRetentionPolicy", "gobblin.retention.newestK.versions.retained", "2")); ConfigurableCleanableDataset<FileSystemDatasetVersion> dataset = new ConfigurableCleanableDataset<FileSystemDatasetVersion>(FileSystem.get(new URI(ConfigurationKeys.LOCAL_FS_URI), new Configuration()), new Properties(), new Path("/someroot"), conf, LoggerFactory.getLogger(ConfigurableCleanableDatasetTest.class)); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(0).getVersionSelectionPolicy().getClass(), EmbeddedRetentionSelectionPolicy.class); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(0).getVersionFinder().getClass(), WatermarkDatasetVersionFinder.class); Assert.assertEquals(dataset.isDatasetBlacklisted(), false); }
private void initWithSelectionPolicy(Config config, Properties jobProps) { String selectionPolicyKey = StringUtils.substringAfter(SELECTION_POLICY_CLASS_KEY, CONFIGURATION_KEY_PREFIX); String versionFinderKey = StringUtils.substringAfter(VERSION_FINDER_CLASS_KEY, CONFIGURATION_KEY_PREFIX); Preconditions.checkArgument( config.hasPath(versionFinderKey), String.format("Version finder class is required at %s in config %s", versionFinderKey, config.root().render(ConfigRenderOptions.concise()))); VersionFinderAndPolicyBuilder<T> builder = VersionFinderAndPolicy.builder(); builder.versionFinder(createVersionFinder(config.getString(versionFinderKey), config, jobProps)); if (config.hasPath(selectionPolicyKey)) { builder.versionSelectionPolicy(createSelectionPolicy( ConfigUtils.getString(config, selectionPolicyKey, SelectNothingPolicy.class.getName()), config, jobProps)); } for (Class<? extends RetentionActionFactory> factoryClass : RETENTION_ACTION_TYPES) { try { RetentionActionFactory factory = factoryClass.newInstance(); if (factory.canCreateWithConfig(config)) { builder.retentionAction((RetentionAction) factory.createRetentionAction(config, this.fs, ConfigUtils.propertiesToConfig(jobProps))); } } catch (InstantiationException | IllegalAccessException e) { Throwables.propagate(e); } } this.versionFindersAndPolicies.add(builder.build()); }
@Test public void testConfigureWithSelectionPolicy() throws Exception { Config conf = ConfigFactory.parseMap(ImmutableMap.<String, String> of("gobblin.retention.version.finder.class", "org.apache.gobblin.data.management.version.finder.WatermarkDatasetVersionFinder", "gobblin.retention.selection.policy.class", "org.apache.gobblin.data.management.policy.NewestKSelectionPolicy", "gobblin.retention.selection.newestK.versionsSelected", "2")); ConfigurableCleanableDataset<FileSystemDatasetVersion> dataset = new ConfigurableCleanableDataset<FileSystemDatasetVersion>(FileSystem.get(new URI(ConfigurationKeys.LOCAL_FS_URI), new Configuration()), new Properties(), new Path("/someroot"), conf, LoggerFactory.getLogger(ConfigurableCleanableDatasetTest.class)); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(0).getVersionSelectionPolicy().getClass(), NewestKSelectionPolicy.class); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(0).getVersionFinder().getClass(), WatermarkDatasetVersionFinder.class); Assert.assertEquals(dataset.isDatasetBlacklisted(), false); }
private void initWithSelectionPolicy(Config config, Properties jobProps) { String selectionPolicyKey = StringUtils.substringAfter(SELECTION_POLICY_CLASS_KEY, CONFIGURATION_KEY_PREFIX); String versionFinderKey = StringUtils.substringAfter(VERSION_FINDER_CLASS_KEY, CONFIGURATION_KEY_PREFIX); Preconditions.checkArgument( config.hasPath(versionFinderKey), String.format("Version finder class is required at %s in config %s", versionFinderKey, config.root().render(ConfigRenderOptions.concise()))); VersionFinderAndPolicyBuilder<T> builder = VersionFinderAndPolicy.builder(); builder.versionFinder(createVersionFinder(config.getString(versionFinderKey), config, jobProps)); if (config.hasPath(selectionPolicyKey)) { builder.versionSelectionPolicy(createSelectionPolicy( ConfigUtils.getString(config, selectionPolicyKey, SelectNothingPolicy.class.getName()), config, jobProps)); } for (Class<? extends RetentionActionFactory> factoryClass : RETENTION_ACTION_TYPES) { try { RetentionActionFactory factory = factoryClass.newInstance(); if (factory.canCreateWithConfig(config)) { builder.retentionAction((RetentionAction) factory.createRetentionAction(config, this.fs, ConfigUtils.propertiesToConfig(jobProps))); } } catch (InstantiationException | IllegalAccessException e) { Throwables.propagate(e); } } this.versionFindersAndPolicies.add(builder.build()); }
@Test public void testConfigureWithMulitplePolicies() throws Exception { Map<String, String> partitionConf = ImmutableMap.<String, String> of("version.finder.class", "org.apache.gobblin.data.management.version.finder.WatermarkDatasetVersionFinder", "selection.policy.class", "org.apache.gobblin.data.management.policy.NewestKSelectionPolicy", "selection.newestK.versionsSelected", "2"); Config conf = ConfigFactory.parseMap(ImmutableMap.<String, List<Map<String, String>>> of("gobblin.retention.dataset.partitions", ImmutableList.of(partitionConf, partitionConf))); ConfigurableCleanableDataset<FileSystemDatasetVersion> dataset = new ConfigurableCleanableDataset<FileSystemDatasetVersion>(FileSystem.get(new URI(ConfigurationKeys.LOCAL_FS_URI), new Configuration()), new Properties(), new Path("/someroot"), conf, LoggerFactory.getLogger(ConfigurableCleanableDatasetTest.class)); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(0).getVersionSelectionPolicy().getClass(), NewestKSelectionPolicy.class); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(0).getVersionFinder().getClass(), WatermarkDatasetVersionFinder.class); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(1).getVersionSelectionPolicy().getClass(), NewestKSelectionPolicy.class); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(1).getVersionFinder().getClass(), WatermarkDatasetVersionFinder.class); Assert.assertEquals(dataset.isDatasetBlacklisted(), false); }
@Override public Void call() throws Exception { // Process each {@link Config}, find dataset and add those into the datasets Config c = confClient.getConfig(u); Dataset datasetForConfig = new ConfigurableCleanableDataset(fileSystem, p, new Path(c.getString(DATASET_PATH)), c, log); datasets.add(datasetForConfig); return null; } };
private void initWithRetentionPolicy(Config config, Properties jobProps, String retentionPolicyKey, String versionFinderKey) { this.versionFindersAndPolicies .add(new VersionFinderAndPolicy<>(createRetentionPolicy(config.getString(retentionPolicyKey), config, jobProps), createVersionFinder(config.getString(versionFinderKey), config, jobProps))); }
/** * Creates a new ConfigurableCleanableDataset configured through gobblin-config-management. The constructor expects * {@link #VERSION_FINDER_CLASS_KEY} and {@link #RETENTION_POLICY_CLASS_KEY} to be available in the * <code>config</code> passed. */ public ConfigurableCleanableDataset(FileSystem fs, Properties jobProps, Path datasetRoot, Config config, Logger log) throws IOException { super(fs, jobProps, config, log); this.datasetRoot = datasetRoot; this.versionFindersAndPolicies = Lists.newArrayList(); if (config.hasPath(DATASET_VERSION_POLICY_ALIAS)) { initWithSelectionPolicy(config.getConfig(DATASET_VERSION_POLICY_ALIAS), jobProps); } else if (config.hasPath(VERSION_FINDER_CLASS_KEY) && config.hasPath(RETENTION_POLICY_CLASS_KEY)) { initWithRetentionPolicy(config, jobProps, RETENTION_POLICY_CLASS_KEY, VERSION_FINDER_CLASS_KEY); } else if (config.hasPath(VERSION_FINDER_CLASS_KEY)) { initWithSelectionPolicy(config.getConfig(RETENTION_CONFIGURATION_KEY), jobProps); } else if (config.hasPath(DATASET_PARTITIONS_LIST_KEY)) { List<? extends Config> versionAndPolicies = config.getConfigList(DATASET_PARTITIONS_LIST_KEY); for (Config versionAndPolicy : versionAndPolicies) { initWithSelectionPolicy(versionAndPolicy, jobProps); } } else { throw new IllegalArgumentException( String.format("Either set version finder at %s and retention policy at %s or set partitions at %s", VERSION_FINDER_CLASS_KEY, RETENTION_POLICY_CLASS_KEY, DATASET_PARTITIONS_LIST_KEY)); } }
@Override public Dataset datasetAtPath(Path path) throws IOException { return new ConfigurableCleanableDataset<DatasetVersion>(this.getFsForDataset(path), this.props, path); }
@Override public ConfigurableCleanableDataset<FileSystemDatasetVersion> datasetAtPath(Path path) throws IOException { try { return new ConfigurableCleanableDataset<>(this.fs, this.props, path, this.client .getConfig(this.props.getProperty(ConfigurationKeys.CONFIG_MANAGEMENT_STORE_URI) + path.toString()), LoggerFactory.getLogger(ConfigurableCleanableDataset.class)); } catch (VersionDoesNotExistException | ConfigStoreFactoryDoesNotExistsException | ConfigStoreCreationException | URISyntaxException e) { throw new IllegalArgumentException(e); } } }
@Override public ConfigurableCleanableDataset<FileSystemDatasetVersion> datasetAtPath(Path path) throws IOException { return new ConfigurableCleanableDataset<>(this.fs, this.props, path, LoggerFactory.getLogger(ConfigurableCleanableDataset.class)); } }
@Override public ConfigurableCleanableDataset<FileSystemDatasetVersion> datasetAtPath(Path path) throws IOException { try { return new ConfigurableCleanableDataset<>(this.fs, this.props, path, this.client .getConfig(this.props.getProperty(ConfigurationKeys.CONFIG_MANAGEMENT_STORE_URI) + path.toString()), LoggerFactory.getLogger(ConfigurableCleanableDataset.class)); } catch (VersionDoesNotExistException | ConfigStoreFactoryDoesNotExistsException | ConfigStoreCreationException | URISyntaxException e) { throw new IllegalArgumentException(e); } } }
@Override public Dataset datasetAtPath(Path path) throws IOException { return new ConfigurableCleanableDataset<DatasetVersion>(this.getFsForDataset(path), this.props, path); }
@Override public Void call() throws Exception { // Process each {@link Config}, find dataset and add those into the datasets Config c = confClient.getConfig(u); Dataset datasetForConfig = new ConfigurableCleanableDataset(fileSystem, p, new Path(c.getString(DATASET_PATH)), c, log); datasets.add(datasetForConfig); return null; } };