public SubsetFilesCopyableDataset(final FileSystem fs, Path rootPath, Properties properties, String idenifier, List<FileStatus> subFiles) { this.rootPath = PathUtils.getPathWithoutSchemeAndAuthority(rootPath); this.fs = fs; this.files = subFiles; this.identifier = idenifier; this.props = properties; }
/** * Compare two path without shedme and authority (the prefix) * @param path1 * @param path2 * @return */ public static boolean compareWithoutSchemeAndAuthority(Path path1, Path path2) { return PathUtils.getPathWithoutSchemeAndAuthority(path1).equals(getPathWithoutSchemeAndAuthority(path2)); } }
public static Path relativizePath(Path fullPath, Path pathPrefix) { return new Path(getPathWithoutSchemeAndAuthority(pathPrefix).toUri() .relativize(getPathWithoutSchemeAndAuthority(fullPath).toUri())); }
private Path getJobPath(Properties jobProps) { return PathUtils.getPathWithoutSchemeAndAuthority(new Path(jobProps.getProperty(ConfigurationKeys.JOB_CONFIG_FILE_PATH_KEY))); }
public RecursivePathFinder(final FileSystem fs, Path rootPath, Properties properties) { this.rootPath = PathUtils.getPathWithoutSchemeAndAuthority(rootPath); this.fs = fs; this.pathFilter = DatasetUtils.instantiatePathFilter(properties); this.includeEmptyDirectories = Boolean.parseBoolean(properties.getProperty(CopyConfiguration.INCLUDE_EMPTY_DIRECTORIES)); }
/** * @param testTempDirPath under which all test files are created on the FileSystem * @param testSetupConfPath setup config file path in classpath */ public RetentionTestDataGenerator(Path testTempDirPath, Path testSetupConfPath, FileSystem fs) { this.fs = fs; this.testTempDirPath = testTempDirPath; this.setupConfig = ConfigFactory.parseResources(PathUtils.getPathWithoutSchemeAndAuthority(testSetupConfPath).toString()); if (!this.setupConfig.hasPath(DATA_GENERATOR_KEY)) { throw new RuntimeException(String.format("Failed to load setup config at %s", testSetupConfPath.toString())); } }
private void unscheduleJobAtPath(Path path) { try { Path pathWithoutSchemeOrAuthority = PathUtils.getPathWithoutSchemeAndAuthority(path); String jobName = this.jobNameMap.get(pathWithoutSchemeOrAuthority); if (jobName == null) { LOG.info("Could not find a scheduled job to unschedule with path " + pathWithoutSchemeOrAuthority); return; } LOG.info("Unscheduling job " + jobName); this.jobScheduler.unscheduleJob(jobName); this.jobNameMap.remove(pathWithoutSchemeOrAuthority); } catch (JobException je) { LOG.error("Could not unschedule job " + this.jobNameMap.get(path)); } }
public RecursiveCopyableDataset(final FileSystem fs, Path rootPath, Properties properties, Path glob) { this.rootPath = PathUtils.getPathWithoutSchemeAndAuthority(rootPath); this.fs = fs; this.pathFilter = DatasetUtils.instantiatePathFilter(properties); this.copyableFileFilter = DatasetUtils.instantiateCopyableFileFilter(properties); this.glob = glob; this.update = Boolean.parseBoolean(properties.getProperty(UPDATE_KEY)); this.delete = Boolean.parseBoolean(properties.getProperty(DELETE_KEY)); this.deleteEmptyDirectories = Boolean.parseBoolean(properties.getProperty(DELETE_EMPTY_DIRECTORIES_KEY)); this.includeEmptyDirectories = Boolean.parseBoolean(properties.getProperty(CopyConfiguration.INCLUDE_EMPTY_DIRECTORIES)); this.applyFilterToDirectories = Boolean.parseBoolean(properties.getProperty(CopyConfiguration.APPLY_FILTER_TO_DIRECTORIES, "false")); this.properties = properties; }
protected static Path replacedPrefix(Path sourcePath, Path prefixTobeReplaced, Path prefixReplacement) { Path sourcePathWithoutSchemeAndAuthority = PathUtils.getPathWithoutSchemeAndAuthority(sourcePath); Preconditions.checkArgument(PathUtils.isAncestor(prefixTobeReplaced, sourcePathWithoutSchemeAndAuthority), "When replacing prefix, all locations must be descendants of the prefix. " + "The prefix: %s, file location: %s.", prefixTobeReplaced, sourcePathWithoutSchemeAndAuthority); Path relativePath = PathUtils.relativizePath(sourcePathWithoutSchemeAndAuthority, prefixTobeReplaced); Path result = new Path(prefixReplacement, relativePath); return result; }
/** * Deletes empty directories starting with startPath and all ancestors up to but not including limitPath. * @param fs {@link FileSystem} where paths are located. * @param limitPath only {@link Path}s that are strict descendants of this path will be deleted. * @param startPath first {@link Path} to delete. Afterwards empty ancestors will be deleted. * @throws IOException */ public static void deleteEmptyParentDirectories(FileSystem fs, Path limitPath, Path startPath) throws IOException { if (PathUtils.isAncestor(limitPath, startPath) && !PathUtils.getPathWithoutSchemeAndAuthority(limitPath) .equals(PathUtils.getPathWithoutSchemeAndAuthority(startPath)) && fs.listStatus(startPath).length == 0) { if (!fs.delete(startPath, false)) { log.warn("Failed to delete empty directory " + startPath); } else { log.info("Deleted empty directory " + startPath); } deleteEmptyParentDirectories(fs, limitPath, startPath.getParent()); } }
private Config loadHoconConfigWithFallback(Path path, Config fallback) throws IOException { try (InputStream is = fs.open(path); Reader reader = new InputStreamReader(is, Charsets.UTF_8)) { return ConfigFactory.parseMap(ImmutableMap.of(ConfigurationKeys.JOB_CONFIG_FILE_PATH_KEY, PathUtils.getPathWithoutSchemeAndAuthority(path).toString())) .withFallback(ConfigFactory.parseReader(reader, ConfigParseOptions.defaults().setSyntax(ConfigSyntax.CONF))) .withFallback(fallback); } }
public FSDatasetDescriptor(Config config) { Preconditions.checkArgument(config.hasPath(DatasetDescriptorConfigKeys.PLATFORM_KEY), "Dataset descriptor config must specify platform"); this.platform = config.getString(DatasetDescriptorConfigKeys.PLATFORM_KEY); this.path = PathUtils.getPathWithoutSchemeAndAuthority(new Path(ConfigUtils.getString(config, DatasetDescriptorConfigKeys.PATH_KEY, DatasetDescriptorConfigKeys.DATASET_DESCRIPTOR_CONFIG_ANY))).toString(); this.formatConfig = new FormatConfig(config); this.isRetentionApplied = ConfigUtils.getBoolean(config, DatasetDescriptorConfigKeys.IS_RETENTION_APPLIED_KEY, false); this.description = ConfigUtils.getString(config, DatasetDescriptorConfigKeys.DESCRIPTION_KEY, ""); this.rawConfig = config.withFallback(this.formatConfig.getRawConfig()).withFallback(DEFAULT_FALLBACK); }
private Config loadHoconConfigAtPath(Path path) throws IOException { try (InputStream is = fs.open(path); Reader reader = new InputStreamReader(is, Charsets.UTF_8)) { return ConfigFactory.parseMap(ImmutableMap.of(ConfigurationKeys.JOB_CONFIG_FILE_PATH_KEY, PathUtils.getPathWithoutSchemeAndAuthority(path).toString())) .withFallback(ConfigFactory.parseReader(reader, ConfigParseOptions.defaults().setSyntax(ConfigSyntax.CONF))); } }
/** * Checks whether possibleAncestor is an ancestor of fullPath. * @param possibleAncestor Possible ancestor of fullPath. * @param fullPath path to check. * @return true if possibleAncestor is an ancestor of fullPath. */ public static boolean isAncestor(Path possibleAncestor, Path fullPath) { return !relativizePath(fullPath, possibleAncestor).equals(getPathWithoutSchemeAndAuthority(fullPath)); }
public static Path getOutputFilePath(CopyableFile file, Path outputDir, CopyEntity.DatasetAndPartition datasetAndPartition) { Path destinationWithoutSchemeAndAuthority = PathUtils.getPathWithoutSchemeAndAuthority(file.getDestination()); return new Path(getPartitionOutputRoot(outputDir, datasetAndPartition), PathUtils.withoutLeadingSeparator(destinationWithoutSchemeAndAuthority)); }
@Test public void testGetPathWithoutSchemeAndAuthority() throws Exception { Path schemeAndAuthority = new Path("hdfs://example.hdfs:9000/"); Path path = new Path("/some/path"); Path fullPath = new Path(schemeAndAuthority, path); Assert.assertTrue(fullPath.toString().startsWith("hdfs")); Assert.assertEquals(PathUtils.getPathWithoutSchemeAndAuthority(fullPath), path); }
/** * Load a {@link Properties} compatible path using fallback as fallback. * @return The {@link Config} in path with fallback as fallback. * @throws IOException */ private Config loadJavaPropsWithFallback(Path propertiesPath, Config fallback) throws IOException { PropertiesConfiguration propertiesConfiguration = new PropertiesConfiguration(); try (InputStreamReader inputStreamReader = new InputStreamReader(this.fs.open(propertiesPath), Charsets.UTF_8)) { propertiesConfiguration.setDelimiterParsingDisabled(ConfigUtils.getBoolean(fallback, PROPERTY_DELIMITER_PARSING_ENABLED_KEY, DEFAULT_PROPERTY_DELIMITER_PARSING_ENABLED_KEY)); propertiesConfiguration.load(inputStreamReader); Config configFromProps = ConfigUtils.propertiesToConfig(ConfigurationConverter.getProperties(propertiesConfiguration)); return ConfigFactory.parseMap(ImmutableMap.of(ConfigurationKeys.JOB_CONFIG_FILE_PATH_KEY, PathUtils.getPathWithoutSchemeAndAuthority(propertiesPath).toString())) .withFallback(configFromProps) .withFallback(fallback); } catch (ConfigurationException ce) { throw new IOException(ce); } }
@VisibleForTesting protected CopyableFile generateCopyableFile(FileStatus singleFile, Path targetPath, long timestampFromPath, Path locationToCopy) throws IOException { return CopyableFile.fromOriginAndDestination(srcFs, singleFile, targetPath, configuration) .originTimestamp(timestampFromPath).upstreamTimestamp(timestampFromPath) .fileSet(PathUtils.getPathWithoutSchemeAndAuthority(locationToCopy).toString()).build(); }
@Test public void testVersionParser() throws Exception { Properties props = new Properties(); props.put(DateTimeDatasetVersionFinder.RETENTION_DATE_TIME_PATTERN_KEY, "yyyy/MM/dd/hh/mm"); DateTimeDatasetVersionFinder parser = new DateTimeDatasetVersionFinder(this.fs, props); Assert.assertEquals(parser.versionClass(), TimestampedDatasetVersion.class); Assert.assertEquals(parser.globVersionPattern(), new Path("*/*/*/*/*")); DateTime version = parser.getDatasetVersion(new Path("2015/06/01/10/12"), this.fs.getFileStatus(testDataPathDummyPath)).getDateTime(); Assert.assertEquals(version.getZone(), DateTimeZone.forID(ConfigurationKeys.PST_TIMEZONE_NAME)); Assert.assertEquals(version, new DateTime(2015, 6, 1, 10, 12, 0, 0, DateTimeZone.forID(ConfigurationKeys.PST_TIMEZONE_NAME))); Assert.assertEquals( PathUtils.getPathWithoutSchemeAndAuthority(parser .getDatasetVersion(new Path("2015/06/01/10/12"), this.fs.getFileStatus(testDataPathDummyPath)) .getPathsToDelete().iterator().next()), PathUtils.getPathWithoutSchemeAndAuthority(this.testDataPathDummyPath)); }
@Test public void testVersionParserWithTimeZone() throws Exception { Properties props = new Properties(); props.put(DateTimeDatasetVersionFinder.RETENTION_DATE_TIME_PATTERN_KEY, "yyyy/MM/dd/hh/mm"); props.put(DateTimeDatasetVersionFinder.RETENTION_DATE_TIME_PATTERN_TIMEZONE_KEY, "UTC"); DateTimeDatasetVersionFinder parser = new DateTimeDatasetVersionFinder(this.fs, props); Assert.assertEquals(parser.versionClass(), TimestampedDatasetVersion.class); Assert.assertEquals(parser.globVersionPattern(), new Path("*/*/*/*/*")); DateTime version = parser.getDatasetVersion(new Path("2015/06/01/10/12"), this.fs.getFileStatus(testDataPathDummyPath)).getDateTime(); Assert.assertEquals(version.getZone(), DateTimeZone.forID("UTC")); Assert.assertEquals(version, new DateTime(2015, 6, 1, 10, 12, 0, 0, DateTimeZone.forID("UTC"))); Assert.assertEquals( PathUtils.getPathWithoutSchemeAndAuthority(parser .getDatasetVersion(new Path("2015/06/01/10/12"), this.fs.getFileStatus(testDataPathDummyPath)) .getPathsToDelete().iterator().next()), PathUtils.getPathWithoutSchemeAndAuthority(this.testDataPathDummyPath)); }