@Override protected int getInfoSize() { return block.getInfoSize(); }
@Override protected int getInfoSize() { return info.getInfoSize(); }
private BlockInfo createAndReadBlockInfo() throws IOException { BlockInfo blockInfo = new BlockInfo(); if (this.splitLength > blockInfo.getInfoSize()) { // At first we go and read the block info containing the recordCount, the accumulatedRecordCount // and the firstRecordStart offset in the current block. This is written at the end of the block and // is of fixed size, currently 3 * Long.SIZE. // TODO: seek not supported by compressed streams. Will throw exception this.stream.seek(this.splitStart + this.splitLength - blockInfo.getInfoSize()); blockInfo.read(new DataInputViewStreamWrapper(this.stream)); } return blockInfo; }
for (FileStatus file : files) { if (file.getLen() < blockInfo.getInfoSize()) { continue; try (FSDataInputStream fdis = fs.open(file.getPath(), blockInfo.getInfoSize())) { fdis.seek(file.getLen() - blockInfo.getInfoSize());
@Test public void testGetStatisticsMultiplePaths() throws IOException { final int blockInfoSize = new BlockInfo().getInfoSize(); final int blockSize = blockInfoSize + 8; final int numBlocks1 = 3; final int numBlocks2 = 5; final File tempFile = createBinaryInputFile("binary_input_format_test", blockSize, numBlocks1); final File tempFile2 = createBinaryInputFile("binary_input_format_test_2", blockSize, numBlocks2); final BinaryInputFormat<Record> inputFormat = new MyBinaryInputFormat(); inputFormat.setFilePaths(tempFile.toURI().toString(), tempFile2.toURI().toString()); inputFormat.setBlockSize(blockSize); BaseStatistics stats = inputFormat.getStatistics(null); Assert.assertEquals("The file size statistics is wrong", blockSize * (numBlocks1 + numBlocks2), stats.getTotalInputSize()); }
@Test public void testCreateInputSplitsWithOneFile() throws IOException { // create temporary file with 3 blocks final File tempFile = File.createTempFile("binary_input_format_test", "tmp"); tempFile.deleteOnExit(); final int blockInfoSize = new BlockInfo().getInfoSize(); final int blockSize = blockInfoSize + 8; final int numBlocks = 3; FileOutputStream fileOutputStream = new FileOutputStream(tempFile); for(int i = 0; i < blockSize * numBlocks; i++) { fileOutputStream.write(new byte[]{1}); } fileOutputStream.close(); final Configuration config = new Configuration(); config.setLong("input.block_size", blockSize + 10); final BinaryInputFormat<Record> inputFormat = new MyBinaryInputFormat(); inputFormat.setFilePath(tempFile.toURI().toString()); inputFormat.setBlockSize(blockSize); inputFormat.configure(config); FileInputSplit[] inputSplits = inputFormat.createInputSplits(numBlocks); Assert.assertEquals("Returns requested numbers of splits.", numBlocks, inputSplits.length); Assert.assertEquals("1. split has block size length.", blockSize, inputSplits[0].getLength()); Assert.assertEquals("2. split has block size length.", blockSize, inputSplits[1].getLength()); Assert.assertEquals("3. split has block size length.", blockSize, inputSplits[2].getLength()); }
@Test public void testCreateInputSplitsWithMulitpleFiles() throws IOException { final int blockInfoSize = new BlockInfo().getInfoSize(); final int blockSize = blockInfoSize + 8; final int numBlocks1 = 3;
private BlockInfo createAndReadBlockInfo() throws IOException { BlockInfo blockInfo = new BlockInfo(); if (this.splitLength > blockInfo.getInfoSize()) { // At first we go and read the block info containing the recordCount, the accumulatedRecordCount // and the firstRecordStart offset in the current block. This is written at the end of the block and // is of fixed size, currently 3 * Long.SIZE. // TODO: seek not supported by compressed streams. Will throw exception this.stream.seek(this.splitStart + this.splitLength - blockInfo.getInfoSize()); blockInfo.read(new DataInputViewStreamWrapper(this.stream)); } return blockInfo; }
private BlockInfo createAndReadBlockInfo() throws IOException { BlockInfo blockInfo = new BlockInfo(); if (this.splitLength > blockInfo.getInfoSize()) { // At first we go and read the block info containing the recordCount, the accumulatedRecordCount // and the firstRecordStart offset in the current block. This is written at the end of the block and // is of fixed size, currently 3 * Long.SIZE. // TODO: seek not supported by compressed streams. Will throw exception this.stream.seek(this.splitStart + this.splitLength - blockInfo.getInfoSize()); blockInfo.read(new DataInputViewStreamWrapper(this.stream)); } return blockInfo; }
for (FileStatus file : files) { if (file.getLen() < blockInfo.getInfoSize()) { continue; try (FSDataInputStream fdis = fs.open(file.getPath(), blockInfo.getInfoSize())) { fdis.seek(file.getLen() - blockInfo.getInfoSize());
for (FileStatus file : files) { if (file.getLen() < blockInfo.getInfoSize()) { continue; try (FSDataInputStream fdis = fs.open(file.getPath(), blockInfo.getInfoSize())) { fdis.seek(file.getLen() - blockInfo.getInfoSize());