@Override public BlockLocation[] getFileBlockLocations(final FileStatus file, final long start, final long len) throws IOException { final BlockLocation[] blockLocations = new BlockLocation[1]; blockLocations[0] = new LocalBlockLocation(this.hostName, file.getLen()); return blockLocations; }
@Override public BlockLocation[] getFileBlockLocations(final FileStatus file, final long start, final long len) throws IOException { final BlockLocation[] blockLocations = new BlockLocation[1]; blockLocations[0] = new LocalBlockLocation(this.hostName, file.getLen()); return blockLocations; }
@Override public FileInputSplit[] createInputSplits(final int minNumSplits) throws IOException { final List<FileStatus> files = this.getFiles(); final FileSystem fs = this.filePath.getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (final FileStatus file : files) { final long splitSize = blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += splitSize) { final long remainingLength = Math.min(pos + splitSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the file %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, this.filePath, minNumSplits)); final FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = fs.getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } return inputSplits.toArray(new FileInputSplit[0]); }
@Override public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { List<FileStatus> files = this.getFiles(); final FileSystem fs = this.filePath.getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (FileStatus file : files) { long splitSize = blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += splitSize) { long remainingLength = Math.min(pos + splitSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the file %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, this.filePath, minNumSplits)); FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = fs.getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) { inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } } return inputSplits.toArray(new FileInputSplit[0]); }
@Override public FileInputSplit[] createInputSplits(int minNumSplits) throws IOException { List<FileStatus> files = this.getFiles(); final FileSystem fs = this.filePath.getFileSystem(); final long blockSize = this.blockSize == NATIVE_BLOCK_SIZE ? fs.getDefaultBlockSize() : this.blockSize; final List<FileInputSplit> inputSplits = new ArrayList<FileInputSplit>(minNumSplits); for (FileStatus file : files) { long splitSize = blockSize; for (long pos = 0, length = file.getLen(); pos < length; pos += splitSize) { long remainingLength = Math.min(pos + splitSize, length) - pos; // get the block locations and make sure they are in order with respect to their offset final BlockLocation[] blocks = fs.getFileBlockLocations(file, pos, remainingLength); Arrays.sort(blocks); inputSplits.add(new FileInputSplit(inputSplits.size(), file.getPath(), pos, remainingLength, blocks[0].getHosts())); } } if (inputSplits.size() < minNumSplits) { LOG.warn(String.format( "With the given block size %d, the file %s cannot be split into %d blocks. Filling up with empty splits...", blockSize, this.filePath, minNumSplits)); FileStatus last = files.get(files.size() - 1); final BlockLocation[] blocks = fs.getFileBlockLocations(last, 0, last.getLen()); for (int index = files.size(); index < minNumSplits; index++) { inputSplits.add(new FileInputSplit(index, last.getPath(), last.getLen(), 0, blocks[0].getHosts())); } } return inputSplits.toArray(new FileInputSplit[0]); }
out.writeLong(file.getLen());
out.writeLong(file.getLen());
out.writeLong(status.getLen());
out.writeLong(status.getLen());
/** * Returns the number of blocks this file/directory consists of * assuming the file system's standard block size. * * @param file * the file * @return the number of block's the file/directory consists of * @throws IOException */ public int getNumberOfBlocks(final FileStatus file) throws IOException { int numberOfBlocks = 0; if (file == null) { return 0; } // For a file, this is easy if (!file.isDir()) { return getNumberOfBlocks(file.getLen(), file.getBlockSize()); } // file is a directory final FileStatus[] files = this.listStatus(file.getPath()); for (int i = 0; i < files.length; i++) { if (!files[i].isDir()) { numberOfBlocks += getNumberOfBlocks(files[i].getLen(), files[i].getBlockSize()); } } return numberOfBlocks; }
/** * Returns the number of blocks this file/directory consists of * assuming the file system's standard block size. * * @param file * the file * @return the number of block's the file/directory consists of * @throws IOException */ public int getNumberOfBlocks(final FileStatus file) throws IOException { int numberOfBlocks = 0; if (file == null) { return 0; } // For a file, this is easy if (!file.isDir()) { return getNumberOfBlocks(file.getLen(), file.getBlockSize()); } // file is a directory final FileStatus[] files = this.listStatus(file.getPath()); for (int i = 0; i < files.length; i++) { if (!files[i].isDir()) { numberOfBlocks += getNumberOfBlocks(files[i].getLen(), files[i].getBlockSize()); } } return numberOfBlocks; }
len += s.getLen();
len += s.getLen();
/** * Fill in the statistics. The last modification time and the total input size are prefilled. * * @param files * The files that are associated with this block input format. * @param stats * The pre-filled statistics. */ protected SequentialStatistics createStatistics(final List<FileStatus> files, final FileBaseStatistics stats) throws IOException { if (files.isEmpty()) return null; final BlockInfo blockInfo = this.createBlockInfo(); long totalCount = 0; for (final FileStatus file : files) { // invalid file if (file.getLen() < blockInfo.getInfoSize()) continue; final FSDataInputStream fdis = file.getPath().getFileSystem().open(file.getPath(), blockInfo.getInfoSize()); fdis.seek(file.getLen() - blockInfo.getInfoSize()); final DataInputStream input = new DataInputStream(fdis); blockInfo.read(input); totalCount += blockInfo.getAccumulatedRecordCount(); } final float avgWidth = totalCount == 0 ? 0 : (float) stats.getTotalInputSize() / totalCount; return new SequentialStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), avgWidth, totalCount); }
/** * Fill in the statistics. The last modification time and the total input size are prefilled. * * @param files * The files that are associated with this block input format. * @param stats * The pre-filled statistics. */ protected SequentialStatistics createStatistics(List<FileStatus> files, FileBaseStatistics stats) throws IOException { if (files.isEmpty()) { return null; } BlockInfo blockInfo = this.createBlockInfo(); long totalCount = 0; for (FileStatus file : files) { // invalid file if (file.getLen() < blockInfo.getInfoSize()) { continue; } FSDataInputStream fdis = file.getPath().getFileSystem().open(file.getPath(), blockInfo.getInfoSize()); fdis.seek(file.getLen() - blockInfo.getInfoSize()); DataInputStream input = new DataInputStream(fdis); blockInfo.read(input); totalCount += blockInfo.getAccumulatedRecordCount(); } final float avgWidth = totalCount == 0 ? 0 : ((float) stats.getTotalInputSize() / totalCount); return new SequentialStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), avgWidth, totalCount); }
/** * Fill in the statistics. The last modification time and the total input size are prefilled. * * @param files * The files that are associated with this block input format. * @param stats * The pre-filled statistics. */ protected SequentialStatistics createStatistics(List<FileStatus> files, FileBaseStatistics stats) throws IOException { if (files.isEmpty()) { return null; } BlockInfo blockInfo = this.createBlockInfo(); long totalCount = 0; for (FileStatus file : files) { // invalid file if (file.getLen() < blockInfo.getInfoSize()) { continue; } FSDataInputStream fdis = file.getPath().getFileSystem().open(file.getPath(), blockInfo.getInfoSize()); fdis.seek(file.getLen() - blockInfo.getInfoSize()); DataInputStream input = new DataInputStream(fdis); blockInfo.read(input); totalCount += blockInfo.getAccumulatedRecordCount(); } final float avgWidth = totalCount == 0 ? 0 : ((float) stats.getTotalInputSize() / totalCount); return new SequentialStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), avgWidth, totalCount); }