public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); fileIn = fs.open(split.getPath()); reader = new Reader(fileIn, fs.getFileStatus(file).getLen(), job); scanner = reader.createScannerByByteRange(start, split.getLength()); }
void readFile() throws IOException { long fileLength = fs.getFileStatus(path).getLen(); int numSplit = 10; long splitSize = fileLength / numSplit + 1; Reader reader = new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf); long offset = 0; long rowCount = 0; BytesWritable key, value; for (int i = 0; i < numSplit; ++i, offset += splitSize) { Scanner scanner = reader.createScannerByByteRange(offset, splitSize); int count = 0; key = new BytesWritable(); value = new BytesWritable(); while (!scanner.atEnd()) { scanner.entry().get(key, value); ++count; scanner.advance(); } scanner.close(); Assert.assertTrue(count > 0); rowCount += count; } Assert.assertEquals(rowCount, reader.getEntryCount()); reader.close(); }
void readFile() throws IOException { long fileLength = fs.getFileStatus(path).getLen(); int numSplit = 10; long splitSize = fileLength / numSplit + 1; Reader reader = new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf); long offset = 0; long rowCount = 0; BytesWritable key, value; for (int i = 0; i < numSplit; ++i, offset += splitSize) { Scanner scanner = reader.createScannerByByteRange(offset, splitSize); int count = 0; key = new BytesWritable(); value = new BytesWritable(); while (!scanner.atEnd()) { scanner.entry().get(key, value); ++count; scanner.advance(); } scanner.close(); Assert.assertTrue(count > 0); rowCount += count; } Assert.assertEquals(rowCount, reader.getEntryCount()); reader.close(); }
void readFile() throws IOException { long fileLength = fs.getFileStatus(path).getLen(); int numSplit = 10; long splitSize = fileLength / numSplit + 1; Reader reader = new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf); long offset = 0; long rowCount = 0; BytesWritable key, value; for (int i = 0; i < numSplit; ++i, offset += splitSize) { Scanner scanner = reader.createScannerByByteRange(offset, splitSize); int count = 0; key = new BytesWritable(); value = new BytesWritable(); while (!scanner.atEnd()) { scanner.entry().get(key, value); ++count; scanner.advance(); } scanner.close(); Assert.assertTrue(count > 0); rowCount += count; } Assert.assertEquals(rowCount, reader.getEntryCount()); reader.close(); }