@Override public ConfigurableCleanableDataset<FileSystemDatasetVersion> datasetAtPath(Path path) throws IOException { return new ConfigurableCleanableDataset<>(this.fs, this.props, path, LoggerFactory.getLogger(ConfigurableCleanableDataset.class)); } }
@Override public Void call() throws Exception { // Process each {@link Config}, find dataset and add those into the datasets Config c = confClient.getConfig(u); Dataset datasetForConfig = new ConfigurableCleanableDataset(fileSystem, p, new Path(c.getString(DATASET_PATH)), c, log); datasets.add(datasetForConfig); return null; } };
@Override public Dataset datasetAtPath(Path path) throws IOException { return new ConfigurableCleanableDataset<DatasetVersion>(this.getFsForDataset(path), this.props, path); }
@Override public ConfigurableCleanableDataset<FileSystemDatasetVersion> datasetAtPath(Path path) throws IOException { try { return new ConfigurableCleanableDataset<>(this.fs, this.props, path, this.client .getConfig(this.props.getProperty(ConfigurationKeys.CONFIG_MANAGEMENT_STORE_URI) + path.toString()), LoggerFactory.getLogger(ConfigurableCleanableDataset.class)); } catch (VersionDoesNotExistException | ConfigStoreFactoryDoesNotExistsException | ConfigStoreCreationException | URISyntaxException e) { throw new IllegalArgumentException(e); } } }
@Test public void testDatasetIsBlacklisted() throws Exception { Config conf = ConfigFactory.parseMap(ImmutableMap.<String, String> of("gobblin.retention.version.finder.class", "org.apache.gobblin.data.management.version.finder.WatermarkDatasetVersionFinder", "gobblin.retention.selection.policy.class", "org.apache.gobblin.data.management.policy.NewestKSelectionPolicy", "gobblin.retention.selection.newestK.versionsSelected", "2", "gobblin.retention.dataset.is.blacklisted", "true")); ConfigurableCleanableDataset<FileSystemDatasetVersion> dataset = new ConfigurableCleanableDataset<FileSystemDatasetVersion>(FileSystem.get(new URI(ConfigurationKeys.LOCAL_FS_URI), new Configuration()), new Properties(), new Path("/someroot"), conf, LoggerFactory.getLogger(ConfigurableCleanableDatasetTest.class)); Assert.assertEquals(dataset.isDatasetBlacklisted(), true); } }
@Test public void testConfigureWithRetentionPolicy() throws Exception { Config conf = ConfigFactory.parseMap(ImmutableMap.<String, String> of("gobblin.retention.version.finder.class", "org.apache.gobblin.data.management.version.finder.WatermarkDatasetVersionFinder", "gobblin.retention.retention.policy.class", "org.apache.gobblin.data.management.retention.policy.NewestKRetentionPolicy", "gobblin.retention.newestK.versions.retained", "2")); ConfigurableCleanableDataset<FileSystemDatasetVersion> dataset = new ConfigurableCleanableDataset<FileSystemDatasetVersion>(FileSystem.get(new URI(ConfigurationKeys.LOCAL_FS_URI), new Configuration()), new Properties(), new Path("/someroot"), conf, LoggerFactory.getLogger(ConfigurableCleanableDatasetTest.class)); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(0).getVersionSelectionPolicy().getClass(), EmbeddedRetentionSelectionPolicy.class); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(0).getVersionFinder().getClass(), WatermarkDatasetVersionFinder.class); Assert.assertEquals(dataset.isDatasetBlacklisted(), false); }
@Test public void testConfigureWithSelectionPolicy() throws Exception { Config conf = ConfigFactory.parseMap(ImmutableMap.<String, String> of("gobblin.retention.version.finder.class", "org.apache.gobblin.data.management.version.finder.WatermarkDatasetVersionFinder", "gobblin.retention.selection.policy.class", "org.apache.gobblin.data.management.policy.NewestKSelectionPolicy", "gobblin.retention.selection.newestK.versionsSelected", "2")); ConfigurableCleanableDataset<FileSystemDatasetVersion> dataset = new ConfigurableCleanableDataset<FileSystemDatasetVersion>(FileSystem.get(new URI(ConfigurationKeys.LOCAL_FS_URI), new Configuration()), new Properties(), new Path("/someroot"), conf, LoggerFactory.getLogger(ConfigurableCleanableDatasetTest.class)); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(0).getVersionSelectionPolicy().getClass(), NewestKSelectionPolicy.class); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(0).getVersionFinder().getClass(), WatermarkDatasetVersionFinder.class); Assert.assertEquals(dataset.isDatasetBlacklisted(), false); }
@Test public void testConfigureWithMulitplePolicies() throws Exception { Map<String, String> partitionConf = ImmutableMap.<String, String> of("version.finder.class", "org.apache.gobblin.data.management.version.finder.WatermarkDatasetVersionFinder", "selection.policy.class", "org.apache.gobblin.data.management.policy.NewestKSelectionPolicy", "selection.newestK.versionsSelected", "2"); Config conf = ConfigFactory.parseMap(ImmutableMap.<String, List<Map<String, String>>> of("gobblin.retention.dataset.partitions", ImmutableList.of(partitionConf, partitionConf))); ConfigurableCleanableDataset<FileSystemDatasetVersion> dataset = new ConfigurableCleanableDataset<FileSystemDatasetVersion>(FileSystem.get(new URI(ConfigurationKeys.LOCAL_FS_URI), new Configuration()), new Properties(), new Path("/someroot"), conf, LoggerFactory.getLogger(ConfigurableCleanableDatasetTest.class)); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(0).getVersionSelectionPolicy().getClass(), NewestKSelectionPolicy.class); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(0).getVersionFinder().getClass(), WatermarkDatasetVersionFinder.class); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(1).getVersionSelectionPolicy().getClass(), NewestKSelectionPolicy.class); Assert.assertEquals(dataset.getVersionFindersAndPolicies().get(1).getVersionFinder().getClass(), WatermarkDatasetVersionFinder.class); Assert.assertEquals(dataset.isDatasetBlacklisted(), false); }
@Override public ConfigurableCleanableDataset<FileSystemDatasetVersion> datasetAtPath(Path path) throws IOException { return new ConfigurableCleanableDataset<>(this.fs, this.props, path, LoggerFactory.getLogger(ConfigurableCleanableDataset.class)); } }
@Override public ConfigurableCleanableDataset<FileSystemDatasetVersion> datasetAtPath(Path path) throws IOException { try { return new ConfigurableCleanableDataset<>(this.fs, this.props, path, this.client .getConfig(this.props.getProperty(ConfigurationKeys.CONFIG_MANAGEMENT_STORE_URI) + path.toString()), LoggerFactory.getLogger(ConfigurableCleanableDataset.class)); } catch (VersionDoesNotExistException | ConfigStoreFactoryDoesNotExistsException | ConfigStoreCreationException | URISyntaxException e) { throw new IllegalArgumentException(e); } } }
@Override public Dataset datasetAtPath(Path path) throws IOException { return new ConfigurableCleanableDataset<DatasetVersion>(this.getFsForDataset(path), this.props, path); }
@Override public Void call() throws Exception { // Process each {@link Config}, find dataset and add those into the datasets Config c = confClient.getConfig(u); Dataset datasetForConfig = new ConfigurableCleanableDataset(fileSystem, p, new Path(c.getString(DATASET_PATH)), c, log); datasets.add(datasetForConfig); return null; } };