public RemoteIterator<LocatedFileStatus> listFiles(Path f, boolean recursive) throws java.io.IOException { return this.underlyingFs.listFiles(replaceScheme(f, this.replacementScheme, this.underlyingScheme), recursive); }
TreeSet<String> getAllFilePath(Path filePath, String resPathPrefix) throws IOException { String fsPathPrefix = filePath.toUri().getPath(); TreeSet<String> fileList = new TreeSet<>(); RemoteIterator<LocatedFileStatus> it = fs.listFiles(filePath, true); while (it.hasNext()) { String path = it.next().getPath().toUri().getPath(); if (!path.startsWith(fsPathPrefix)) throw new IllegalStateException("File path " + path + " is supposed to start with " + fsPathPrefix); String resPath = resPathPrefix + path.substring(fsPathPrefix.length() + 1); fileList.add(resPath); } return fileList; }
private List<Path> getFilesRecursively(String fileBackupDir) throws IllegalArgumentException, IOException { FileSystem fs = FileSystem.get((new Path(fileBackupDir)).toUri(), new Configuration()); List<Path> list = new ArrayList<>(); RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(fileBackupDir), true); while (it.hasNext()) { Path p = it.next().getPath(); if (HFile.isHFileFormat(fs, p)) { list.add(p); } } return list; }
public static List<String> listPaths(JavaSparkContext sc, String path, boolean recursive) throws IOException { if (path.endsWith(".blob.core.windows.net/") || path.endsWith(".blob.core.windows.net")) { //Azure library bug: seems that we get an infinite loop if we try to list paths on the // root directory, for some versions of the Azure Hadoop library - deadlocks on fileIter.hasNext() throw new IllegalStateException("Cannot list paths from root directory due to Azure library bug"); } List<String> paths = new ArrayList<>(); Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(URI.create(path), config); RemoteIterator fileIter = hdfs.listFiles(new Path(path), recursive); while (fileIter.hasNext()) { String filePath = ((LocatedFileStatus) fileIter.next()).getPath().toString(); paths.add(filePath); } return paths; }
@Override public String[] listFiles(URI fileUri, boolean recursive) throws IOException { ArrayList<String> filePathStrings = new ArrayList<>(); Path path = new Path(fileUri); if (_hadoopFS.exists(path)) { RemoteIterator<LocatedFileStatus> fileListItr = _hadoopFS.listFiles(path, recursive); while (fileListItr != null && fileListItr.hasNext()) { LocatedFileStatus file = fileListItr.next(); filePathStrings.add(file.getPath().toUri().toString()); } } else { throw new IllegalArgumentException("segmentUri is not valid"); } String[] retArray = new String[filePathStrings.size()]; filePathStrings.toArray(retArray); return retArray; }
/** list files sorted by modification time that have not been modified since 'olderThan'. if * 'olderThan' is <= 0 then the filtering is disabled */ public static ArrayList<Path> listFilesByModificationTime(FileSystem fs, Path directory, long olderThan) throws IOException { ArrayList<LocatedFileStatus> fstats = new ArrayList<>(); RemoteIterator<LocatedFileStatus> itr = fs.listFiles(directory, false); while (itr.hasNext()) { LocatedFileStatus fileStatus = itr.next(); if (olderThan > 0) { if (fileStatus.getModificationTime() <= olderThan) { fstats.add(fileStatus); } } else { fstats.add(fileStatus); } } Collections.sort(fstats, new ModifTimeComparator()); ArrayList<Path> result = new ArrayList<>(fstats.size()); for (LocatedFileStatus fstat : fstats) { result.add(fstat.getPath()); } return result; }
private JavaRDD<String> listPathsSubset(JavaSparkContext sc, String path, int max, int rngSeed) throws IOException { Configuration config = new Configuration(); FileSystem hdfs = FileSystem.get(URI.create(path), config); RemoteIterator<LocatedFileStatus> fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(path), true); List<String> paths = new ArrayList<>(); while (fileIter.hasNext()) { String filePath = fileIter.next().getPath().toString(); paths.add(filePath); } //Now, get a consistent random subset - assuming here that file listing isn't consistent Collections.sort(paths); int[] arr = new int[paths.size()]; for( int i=0; i<arr.length ; i++){ arr[i] = i; } MathUtils.shuffleArray(arr, rngSeed); List<String> out = new ArrayList<>(); for( int i=0; i<arr.length && i < max; i++ ){ out.add(paths.get(arr[i])); } return sc.parallelize(out); } }
/** * Calls fs.listFiles() to get FileStatus and BlockLocations together for reducing rpc call * * @param fs file system * @param dir directory * @return LocatedFileStatus list */ public static List<LocatedFileStatus> listLocatedStatus(final FileSystem fs, final Path dir) throws IOException { List<LocatedFileStatus> status = null; try { RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fs .listFiles(dir, false); while (locatedFileStatusRemoteIterator.hasNext()) { if (status == null) { status = Lists.newArrayList(); } status.add(locatedFileStatusRemoteIterator.next()); } } catch (FileNotFoundException fnfe) { // if directory doesn't exist, return null if (LOG.isTraceEnabled()) { LOG.trace(dir + " doesn't exist"); } } return status; }
public static BackupInfo loadBackupInfo(Path backupRootPath, String backupId, FileSystem fs) throws IOException { Path backupPath = new Path(backupRootPath, backupId); RemoteIterator<LocatedFileStatus> it = fs.listFiles(backupPath, true); while (it.hasNext()) { LocatedFileStatus lfs = it.next(); if (lfs.getPath().getName().equals(BackupManifest.MANIFEST_FILE_NAME)) { // Load BackupManifest BackupManifest manifest = new BackupManifest(fs, lfs.getPath().getParent()); BackupInfo info = manifest.toBackupInfo(); return info; } } return null; }
private void getSortedFileList(Path eventPath, List<LocatedFileStatus> fileStatuses, FileSystem fileSystem) throws IOException { //Add all the files in this directory. No need to sort. RemoteIterator<LocatedFileStatus> iteratorNext = fileSystem.listFiles(eventPath, false); while (iteratorNext.hasNext()) { LocatedFileStatus status = iteratorNext.next(); LOG.info(" files added at getSortedFileList" + status.getPath()); fileStatuses.add(status); } // get all the directories in this path and sort them FileStatus[] eventDirs = fileSystem.listStatus(eventPath, EximUtil.getDirectoryFilter(fileSystem)); if (eventDirs.length == 0) { return; } Arrays.sort(eventDirs, new EventDumpDirComparator()); // add files recursively for each directory for (FileStatus fs : eventDirs) { getSortedFileList(fs.getPath(), fileStatuses, fileSystem); } }
/** * Note that this method copies within a cluster. If you want to copy outside the cluster, you will * need to create a new configuration and filesystem. Keeps files if copy/move is partial. */ @Override public boolean copy(URI srcUri, URI dstUri) throws IOException { Path source = new Path(srcUri); Path target = new Path(dstUri); RemoteIterator<LocatedFileStatus> sourceFiles = _hadoopFS.listFiles(source, true); if (sourceFiles != null) { while (sourceFiles.hasNext()) { boolean succeeded = FileUtil.copy(_hadoopFS, sourceFiles.next().getPath(), _hadoopFS, target, true, _hadoopConf); if (!succeeded) { return false; } } } return true; }
/** * Copy meta data to of a backup session * @param fs file system * @param tmpBackupDir temp backup directory, where meta is locaed * @param backupDirPath new path for backup * @throws IOException exception */ protected void copyMetaData(FileSystem fs, Path tmpBackupDir, Path backupDirPath) throws IOException { RemoteIterator<LocatedFileStatus> it = fs.listFiles(tmpBackupDir, true); List<Path> toKeep = new ArrayList<Path>(); while (it.hasNext()) { Path p = it.next().getPath(); if (fs.isDirectory(p)) { continue; } // Keep meta String fileName = p.toString(); if (fileName.indexOf(FSTableDescriptors.TABLEINFO_DIR) > 0 || fileName.indexOf(HRegionFileSystem.REGION_INFO_FILE) > 0) { toKeep.add(p); } } // Copy meta to destination for (Path p : toKeep) { Path newPath = convertToDest(p, backupDirPath); copyFile(fs, p, newPath); } }
public static List<String> getFiles(FileSystem fs, Path rootDir, List<String> files, PathFilter filter) throws IOException { RemoteIterator<LocatedFileStatus> it = fs.listFiles(rootDir, true); while (it.hasNext()) { LocatedFileStatus lfs = it.next(); if (lfs.isDirectory()) { continue; } // apply filter if (filter.accept(lfs.getPath())) { files.add(lfs.getPath().toString()); } } return files; }
private long getPathLength(JobConf conf, Path path, Class<? extends InputFormat> clazz, long threshold) throws IOException { if (ContentSummaryInputFormat.class.isAssignableFrom(clazz)) { InputFormat input = HiveInputFormat.getInputFormatFromCache(clazz, conf); return ((ContentSummaryInputFormat)input).getContentSummary(path, conf).getLength(); } else { FileSystem fs = path.getFileSystem(conf); try { long length = 0; RemoteIterator<LocatedFileStatus> results = fs.listFiles(path, true); // No need to iterate more, when threshold is reached // (beneficial especially for object stores) while (length <= threshold && results.hasNext()) { length += results.next().getLen(); } LOG.trace("length=" + length + ", threshold=" + threshold); return length; } catch (FileNotFoundException e) { return 0; } } } }
protected void dumpBackupDir() throws IOException { // Dump Backup Dir FileSystem fs = FileSystem.get(conf1); RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(BACKUP_ROOT_DIR), true); while (it.hasNext()) { LOG.debug(Objects.toString(it.next().getPath())); } } }
static SortedSet<byte []> readKeysToSearch(final Configuration conf) throws IOException, InterruptedException { Path keysInputDir = new Path(conf.get(SEARCHER_INPUTDIR_KEY)); FileSystem fs = FileSystem.get(conf); SortedSet<byte []> result = new TreeSet<>(Bytes.BYTES_COMPARATOR); if (!fs.exists(keysInputDir)) { throw new FileNotFoundException(keysInputDir.toString()); } if (!fs.isDirectory(keysInputDir)) { throw new UnsupportedOperationException("TODO"); } else { RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(keysInputDir, false); while(iterator.hasNext()) { LocatedFileStatus keyFileStatus = iterator.next(); // Skip "_SUCCESS" file. if (keyFileStatus.getPath().getName().startsWith("_")) continue; result.addAll(readFileToSearch(conf, fs, keyFileStatus)); } } return result; }
private List<FileStatus> getListOfWALFiles(Configuration c) throws IOException { Path logRoot = new Path(CommonFSUtils.getWALRootDir(c), HConstants.HREGION_LOGDIR_NAME); FileSystem fs = logRoot.getFileSystem(c); RemoteIterator<LocatedFileStatus> it = fs.listFiles(logRoot, true); List<FileStatus> logFiles = new ArrayList<FileStatus>(); while (it.hasNext()) { LocatedFileStatus lfs = it.next(); if (lfs.isFile() && !AbstractFSWALProvider.isMetaFile(lfs.getPath())) { logFiles.add(lfs); LOG.info(Objects.toString(lfs)); } } return logFiles; }
static SortedSet<byte []> readKeysToSearch(final Configuration conf) throws IOException, InterruptedException { Path keysInputDir = new Path(conf.get(SEARCHER_INPUTDIR_KEY)); FileSystem fs = FileSystem.get(conf); SortedSet<byte []> result = new TreeSet<>(Bytes.BYTES_COMPARATOR); if (!fs.exists(keysInputDir)) { throw new FileNotFoundException(keysInputDir.toString()); } if (!fs.isDirectory(keysInputDir)) { FileStatus keyFileStatus = fs.getFileStatus(keysInputDir); readFileToSearch(conf, fs, keyFileStatus, result); } else { RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(keysInputDir, false); while(iterator.hasNext()) { LocatedFileStatus keyFileStatus = iterator.next(); // Skip "_SUCCESS" file. if (keyFileStatus.getPath().getName().startsWith("_")) continue; readFileToSearch(conf, fs, keyFileStatus, result); } } return result; }
/** * Will assert that actual files match expected. * @param expectedFiles - suffixes of expected Paths. Must be the same length * @param rootPath - table or partition root where to start looking for actual files, recursively */ void assertExpectedFileSet(Set<String> expectedFiles, String rootPath) throws Exception { int suffixLength = 0; for(String s : expectedFiles) { if(suffixLength > 0) { assert suffixLength == s.length() : "all entries must be the same length. current: " + s; } suffixLength = s.length(); } FileSystem fs = FileSystem.get(hiveConf); Set<String> actualFiles = new HashSet<>(); RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles(new Path(rootPath), true); while (remoteIterator.hasNext()) { LocatedFileStatus lfs = remoteIterator.next(); if(!lfs.isDirectory() && org.apache.hadoop.hive.common.FileUtils.HIDDEN_FILES_PATH_FILTER.accept(lfs.getPath())) { String p = lfs.getPath().toString(); actualFiles.add(p.substring(p.length() - suffixLength, p.length())); } } Assert.assertEquals("Unexpected file list", expectedFiles, actualFiles); } void checkExpected(List<String> rs, String[][] expected, String msg, Logger LOG, boolean checkFileName) {
private boolean hasHFileLink(Path tableDir) throws IOException { if (fs.exists(tableDir)) { RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(tableDir, true); while (iterator.hasNext()) { LocatedFileStatus fileStatus = iterator.next(); if (fileStatus.isFile() && HFileLink.isHFileLink(fileStatus.getPath())) { return true; } } } return false; }