protected boolean testForUnsplittable(FileStatus pathFile) { if(getInflaterInputStreamFactory(pathFile.getPath()) != null) { unsplittable = true; return true; } return false; }
/** * A simple hook to filter files and directories from the input. * The method may be overridden. Hadoop's FileInputFormat has a similar mechanism and applies the * same filters by default. * * @param fileStatus The file status to check. * @return true, if the given file or directory is accepted */ public boolean acceptFile(FileStatus fileStatus) { final String name = fileStatus.getPath().getName(); return !name.startsWith("_") && !name.startsWith(".") && !filesFilter.filterPath(fileStatus.getPath()); }
private static void internalCopyDirectory(Path sourcePath, Path targetPath, boolean executable, FileSystem sFS, FileSystem tFS) throws IOException { tFS.mkdirs(targetPath); FileStatus[] contents = sFS.listStatus(sourcePath); for (FileStatus content : contents) { String distPath = content.getPath().toString(); if (content.isDir()) { if (distPath.endsWith("/")) { distPath = distPath.substring(0, distPath.length() - 1); } } String localPath = targetPath + distPath.substring(distPath.lastIndexOf("/")); copy(content.getPath(), new Path(localPath), executable); } }
private static void getCopyTasks(Path p, String rel, List<FileCopyTask> tasks) throws IOException { FileStatus[] res = p.getFileSystem().listStatus(p); if (res == null) { return; } for (FileStatus fs : res) { if (fs.isDir()) { getCopyTasks(fs.getPath(), rel + fs.getPath().getName() + "/", tasks); } else { Path cp = fs.getPath(); tasks.add(new FileCopyTask(cp, rel + cp.getName())); } } } }
/** * This recreates the new working directory of the recovered RocksDB instance and links/copies the contents from * a local state. */ private void restoreInstanceDirectoryFromPath(Path source) throws IOException { FileSystem fileSystem = source.getFileSystem(); final FileStatus[] fileStatuses = fileSystem.listStatus(source); if (fileStatuses == null) { throw new IOException("Cannot list file statues. Directory " + source + " does not exist."); } for (FileStatus fileStatus : fileStatuses) { final Path filePath = fileStatus.getPath(); final String fileName = filePath.getName(); File restoreFile = new File(source.getPath(), fileName); File targetFile = new File(stateBackend.instanceRocksDBPath.getPath(), fileName); if (fileName.endsWith(SST_FILE_SUFFIX)) { // hardlink'ing the immutable sst-files. Files.createLink(targetFile.toPath(), restoreFile.toPath()); } else { // true copy for all other files. Files.copy(restoreFile.toPath(), targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING); } } }
@Override public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { final List<FileStatus> files = this.getFiles(); final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (FileStatus file : files) { final FileSystem fs = file.getPath().getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += blockSize) { long remainingLength = Math.min(pos + blockSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the files %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, Arrays.toString(getFilePaths()), minNumSplits)); FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = last.getPath().getFileSystem().getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) { inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } } return inputSplits.toArray(new FileInputSplit[inputSplits.size()]); }
private static void addToZip(Path fileOrDirectory, FileSystem fs, Path rootDir, ZipOutputStream out) throws IOException { String relativePath = fileOrDirectory.getPath().replace(rootDir.getPath() + '/', ""); if (fs.getFileStatus(fileOrDirectory).isDir()) { out.putNextEntry(new ZipEntry(relativePath + '/')); for (FileStatus containedFile : fs.listStatus(fileOrDirectory)) { addToZip(containedFile.getPath(), fs, rootDir, out); } } else { ZipEntry entry = new ZipEntry(relativePath); out.putNextEntry(entry); try (FSDataInputStream in = fs.open(fileOrDirectory)) { IOUtils.copyBytes(in, out, false); } out.closeEntry(); } }
private List<String> listNewFiles(FileSystem fileSystem) throws IOException { List<String> files = new ArrayList<String>(); FileStatus[] statuses = fileSystem.listStatus(new Path(path)); if (statuses == null) { LOG.warn("Path does not exist: {}", path); } else { for (FileStatus status : statuses) { Path filePath = status.getPath(); String fileName = filePath.getName(); long modificationTime = status.getModificationTime(); if (!isFiltered(fileName, modificationTime)) { files.add(filePath.toString()); modificationTimes.put(fileName, modificationTime); } } } return files; }
if (dir.isDir()) { if (acceptFile(dir) && enumerateNestedFiles) { length += addFilesInDir(dir.getPath(), files, logExcludedFiles); } else { if (logExcludedFiles && LOG.isDebugEnabled()) { LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded."); } else { if (logExcludedFiles && LOG.isDebugEnabled()) { LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
private Map<Path, String> getFileContentByPath(Path directory) throws Exception { Map<Path, String> contents = new HashMap<>(); final FileStatus[] filesInBucket = getFileSystem().listStatus(directory); for (FileStatus file : filesInBucket) { final long fileLength = file.getLen(); byte[] serContents = new byte[(int) fileLength]; FSDataInputStream stream = getFileSystem().open(file.getPath()); stream.read(serContents); contents.put(file.getPath(), new String(serContents, StandardCharsets.UTF_8)); } return contents; }
int splitNum = 0; for (final FileStatus file : files) { final FileSystem fs = file.getPath().getFileSystem(); final BlockLocation[] blocks = fs.getFileBlockLocations(file, 0, file.getLen()); Set<String> hosts = new HashSet<String>(); len = READ_WHOLE_SPLIT_FLAG; FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), 0, len, hosts.toArray(new String[hosts.size()])); inputSplits.add(fis); for (final FileStatus file : files) { final FileSystem fs = file.getPath().getFileSystem(); final long len = file.getLen(); final long blockSize = file.getBlockSize(); FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, splitSize, blocks[blockIndex].getHosts()); inputSplits.add(fis); final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), position, bytesUnassigned, blocks[blockIndex].getHosts()); inputSplits.add(fis); final FileInputSplit fis = new FileInputSplit(splitNum++, file.getPath(), 0, 0, hosts); inputSplits.add(fis);
protected FileBaseStatistics getFileStats(FileBaseStatistics cachedStats, Path filePath, FileSystem fs, ArrayList<FileStatus> files) throws IOException { // get the file info and check whether the cached statistics are still valid. final FileStatus file = fs.getFileStatus(filePath); long totalLength = 0; // enumerate all files if (file.isDir()) { totalLength += addFilesInDir(file.getPath(), files, false); } else { files.add(file); testForUnsplittable(file); totalLength += file.getLen(); } // check the modification time stamp long latestModTime = 0; for (FileStatus f : files) { latestModTime = Math.max(f.getModificationTime(), latestModTime); } // check whether the cached statistics are still valid, if we have any if (cachedStats != null && latestModTime <= cachedStats.getLastModificationTime()) { return cachedStats; } // sanity check if (totalLength <= 0) { totalLength = BaseStatistics.SIZE_UNKNOWN; } return new FileBaseStatistics(latestModTime, totalLength, BaseStatistics.AVG_RECORD_BYTES_UNKNOWN); }
FileInputSplit split = new FileInputSplit(0, file.getPath(), offset, file.getLen() - offset, null);
Set<String> paths = new HashSet<>(filestatuses.length); for (FileStatus filestatus : filestatuses) { paths.add(filestatus.getPath().getPath());
Set<String> paths = new HashSet<>(filestatuses.length); for (FileStatus filestatus : filestatuses) { paths.add(filestatus.getPath().getPath());
@Test public void testSerializationOnlyInProgress() throws IOException { final File testFolder = tempFolder.newFolder(); final FileSystem fs = FileSystem.get(testFolder.toURI()); final Path testBucket = new Path(testFolder.getPath(), "test"); final RecoverableWriter writer = fs.createRecoverableWriter(); final RecoverableFsDataOutputStream stream = writer.open(testBucket); stream.write(IN_PROGRESS_CONTENT.getBytes(Charset.forName("UTF-8"))); final RecoverableWriter.ResumeRecoverable current = stream.persist(); final BucketState<String> bucketState = new BucketState<>( "test", testBucket, Long.MAX_VALUE, current, new HashMap<>()); final SimpleVersionedSerializer<BucketState<String>> serializer = new BucketStateSerializer<>( writer.getResumeRecoverableSerializer(), writer.getCommitRecoverableSerializer(), SimpleVersionedStringSerializer.INSTANCE ); final byte[] bytes = SimpleVersionedSerialization.writeVersionAndSerialize(serializer, bucketState); // to simulate that everything is over for file. stream.close(); final BucketState<String> recoveredState = SimpleVersionedSerialization.readVersionAndDeSerialize(serializer, bytes); Assert.assertEquals(testBucket, recoveredState.getBucketPath()); FileStatus[] statuses = fs.listStatus(testBucket.getParent()); Assert.assertEquals(1L, statuses.length); Assert.assertTrue( statuses[0].getPath().getPath().startsWith( (new Path(testBucket.getParent(), ".test.inprogress")).toString()) ); }
protected boolean testForUnsplittable(FileStatus pathFile) { if(getInflaterInputStreamFactory(pathFile.getPath()) != null) { unsplittable = true; return true; } return false; }