/** * Returns the number of blocks this file/directory consists of * assuming the file system's standard block size. * * @param file * the file * @return the number of block's the file/directory consists of * @throws IOException */ public int getNumberOfBlocks(final FileStatus file) throws IOException { int numberOfBlocks = 0; if (file == null) { return 0; } // For a file, this is easy if (!file.isDir()) { return getNumberOfBlocks(file.getLen(), file.getBlockSize()); } // file is a directory final FileStatus[] files = this.listStatus(file.getPath()); for (int i = 0; i < files.length; i++) { if (!files[i].isDir()) { numberOfBlocks += getNumberOfBlocks(files[i].getLen(), files[i].getBlockSize()); } } return numberOfBlocks; }
long latestModTime = file.getModificationTime(); if (file.isDir()) { FileStatus[] fss = fs.listStatus(filePath); files.ensureCapacity(fss.length); if (!s.isDir()) { files.add(s); latestModTime = Math.max(s.getModificationTime(), latestModTime); testForUnsplittable(s); len += s.getLen();
@Override public FileInputSplit[] createInputSplits(final int minNumSplits) throws IOException { final List<FileStatus> files = this.getFiles(); final FileSystem fs = this.filePath.getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (final FileStatus file : files) { final long splitSize = blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += splitSize) { final long remainingLength = Math.min(pos + splitSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the file %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, this.filePath, minNumSplits)); final FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = fs.getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } return inputSplits.toArray(new FileInputSplit[0]); }
private List<FileStatus> getFiles() throws IOException { // get all the files that are involved in the splits final List<FileStatus> files = new ArrayList<FileStatus>(); final FileSystem fs = this.filePath.getFileSystem(); final FileStatus pathFile = fs.getFileStatus(this.filePath); if (pathFile.isDir()) { // input is directory. list all contained files final FileStatus[] partials = fs.listStatus(this.filePath); for (int i = 0; i < partials.length; i++) if (!partials[i].isDir()) files.add(partials[i]); } else files.add(pathFile); return files; }
public static void copy(Path sourcePath, Path targetPath, boolean executable) throws IOException { FileSystem sFS = sourcePath.getFileSystem(); FileSystem tFS = targetPath.getFileSystem(); if (!tFS.exists(targetPath)) { if (sFS.getFileStatus(sourcePath).isDir()) { tFS.mkdirs(targetPath); FileStatus[] contents = sFS.listStatus(sourcePath); for (FileStatus content : contents) { String distPath = content.getPath().toString(); if (content.isDir()) { if (distPath.endsWith("/")) { distPath = distPath.substring(0, distPath.length() - 1); } } String localPath = targetPath.toString() + distPath.substring(distPath.lastIndexOf("/")); copy(content.getPath(), new Path(localPath), executable); } } else { try { FSDataOutputStream lfsOutput = tFS.create(targetPath, false); FSDataInputStream fsInput = sFS.open(sourcePath); IOUtils.copyBytes(fsInput, lfsOutput); new File(targetPath.toString()).setExecutable(executable); } catch (IOException ioe) { } } } }
/** * A simple hook to filter files and directories from the input. * The method may be overridden. Hadoop's FileInputFormat has a similar mechanism and applies the * same filters by default. * * @param fileStatus * @return true, if the given file or directory is accepted */ protected boolean acceptFile(FileStatus fileStatus) { final String name = fileStatus.getPath().getName(); return !name.startsWith("_") && !name.startsWith("."); }
@Override public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { List<FileStatus> files = this.getFiles(); final FileSystem fs = this.filePath.getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (FileStatus file : files) { long splitSize = blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += splitSize) { long remainingLength = Math.min(pos + splitSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the file %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, this.filePath, minNumSplits)); FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = fs.getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) { inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } } return inputSplits.toArray(new FileInputSplit[0]); }
protected List<FileStatus> getFiles() throws IOException { // get all the files that are involved in the splits List<FileStatus> files = new ArrayList<FileStatus>(); final FileSystem fs = this.filePath.getFileSystem(); final FileStatus pathFile = fs.getFileStatus(this.filePath); if (pathFile.isDir()) { // input is directory. list all contained files final FileStatus[] partials = fs.listStatus(this.filePath); for (int i = 0; i < partials.length; i++) { if (!partials[i].isDir()) { files.add(partials[i]); } } } else { files.add(pathFile); } return files; }
FileSystem fs = nephelePath.getFileSystem(); FileStatus fileStatus = fs.getFileStatus(nephelePath); if (!fileStatus.isDir()) { return Arrays.asList(openInput(inputFormatClass, path, configuration)); List<F> formats = new ArrayList<F>(); for (int index = 0; index < list.length; index++) { formats.add(openInput(inputFormatClass, list[index].getPath().toString(), configuration));
/** * A simple hook to filter files and directories from the input. * The method may be overridden. Hadoop's FileInputFormat has a similar mechanism and applies the * same filters by default. * * @param fileStatus * @return true, if the given file or directory is accepted */ protected boolean acceptFile(FileStatus fileStatus) { final String name = fileStatus.getPath().getName(); return !name.startsWith("_") && !name.startsWith("."); }
/** * Returns the number of blocks this file/directory consists of * assuming the file system's standard block size. * * @param file * the file * @return the number of block's the file/directory consists of * @throws IOException */ public int getNumberOfBlocks(final FileStatus file) throws IOException { int numberOfBlocks = 0; if (file == null) { return 0; } // For a file, this is easy if (!file.isDir()) { return getNumberOfBlocks(file.getLen(), file.getBlockSize()); } // file is a directory final FileStatus[] files = this.listStatus(file.getPath()); for (int i = 0; i < files.length; i++) { if (!files[i].isDir()) { numberOfBlocks += getNumberOfBlocks(files[i].getLen(), files[i].getBlockSize()); } } return numberOfBlocks; }
@Override public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { List<FileStatus> files = this.getFiles(); final FileSystem fs = this.filePath.getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (FileStatus file : files) { long splitSize = blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += splitSize) { long remainingLength = Math.min(pos + splitSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the file %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, this.filePath, minNumSplits)); FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = fs.getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) { inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } } return inputSplits.toArray(new FileInputSplit[0]); }
protected List<FileStatus> getFiles() throws IOException { // get all the files that are involved in the splits List<FileStatus> files = new ArrayList<FileStatus>(); final FileSystem fs = this.filePath.getFileSystem(); final FileStatus pathFile = fs.getFileStatus(this.filePath); if (pathFile.isDir()) { // input is directory. list all contained files final FileStatus[] partials = fs.listStatus(this.filePath); for (int i = 0; i < partials.length; i++) { if (!partials[i].isDir()) { files.add(partials[i]); } } } else { files.add(pathFile); } return files; }
FileSystem fs = nephelePath.getFileSystem(); FileStatus fileStatus = fs.getFileStatus(nephelePath); if (!fileStatus.isDir()) { return Arrays.asList(openInput(inputFormatClass, path, configuration)); List<F> formats = new ArrayList<F>(); for (int index = 0; index < list.length; index++) { formats.add(openInput(inputFormatClass, list[index].getPath().toString(), configuration));
long latestModTime = file.getModificationTime(); if (file.isDir()) { FileStatus[] fss = fs.listStatus(filePath); files.ensureCapacity(fss.length); if (!s.isDir()) { files.add(s); latestModTime = Math.max(s.getModificationTime(), latestModTime); testForUnsplittable(s); len += s.getLen();
@Override public BlockLocation[] getFileBlockLocations(final FileStatus file, final long start, final long len) throws IOException { final BlockLocation[] blockLocations = new BlockLocation[1]; blockLocations[0] = new LocalBlockLocation(this.hostName, file.getLen()); return blockLocations; }
private boolean testForUnsplittable(FileStatus pathFile) { if(pathFile.getPath().getName().endsWith(DEFLATE_SUFFIX)) { unsplittable = true; return true; } return false; }