private boolean checkCompatibility(OrcFileKeyWrapper k) { // check compatibility with subsequent files if (!fileSchema.equals(k.getFileSchema())) { LOG.warn("Incompatible ORC file merge! Schema mismatch for " + k.getInputPath()); return false; } if (!k.getCompression().equals(compression)) { LOG.warn("Incompatible ORC file merge! Compression codec mismatch for " + k.getInputPath()); return false; } if (k.getCompressBufferSize() != compressBuffSize) { LOG.warn("Incompatible ORC file merge! Compression buffer size mismatch for " + k.getInputPath()); return false; } if (!k.getFileVersion().equals(fileVersion)) { LOG.warn("Incompatible ORC file merge! File version mismatch for " + k.getInputPath()); return false; } if (!k.getWriterVersion().equals(writerVersion)) { LOG.warn("Incompatible ORC file merge! Writer version mismatch for " + k.getInputPath()); return false; } if (k.getRowIndexStride() != rowIndexStride) { LOG.warn("Incompatible ORC file merge! Row index stride mismatch for " + k.getInputPath()); return false; } return true; }
private boolean checkCompatibility(OrcFileKeyWrapper k) { // check compatibility with subsequent files if ((k.getTypes().get(0).getSubtypesCount() != columnCount)) { LOG.warn("Incompatible ORC file merge! Column counts mismatch for " + k.getInputPath()); return false; } if (!k.getCompression().equals(compression)) { LOG.warn("Incompatible ORC file merge! Compression codec mismatch for " + k.getInputPath()); return false; } if (k.getCompressBufferSize() != compressBuffSize) { LOG.warn("Incompatible ORC file merge! Compression buffer size mismatch for " + k.getInputPath()); return false; } if (!k.getVersion().equals(version)) { LOG.warn("Incompatible ORC file merge! Version mismatch for " + k.getInputPath()); return false; } if (k.getRowIndexStride() != rowIndexStride) { LOG.warn("Incompatible ORC file merge! Row index stride mismatch for " + k.getInputPath()); return false; } return true; }
LOG.warn("Incompatible ORC file merge! Stripe statistics is missing. " + k.getInputPath()); addIncompatibleFile(k.getInputPath()); return; filePath = k.getInputPath().toUri().getPath(); fixTmpPath(k.getInputPath().getParent()); prevPath = k.getInputPath(); reader = OrcFile.createReader(fs, k.getInputPath()); if (LOG.isInfoEnabled()) { LOG.info("ORC merge file input path: " + k.getInputPath()); addIncompatibleFile(k.getInputPath()); return; if (!k.getInputPath().equals(prevPath)) { reader = OrcFile.createReader(fs, k.getInputPath()); fdis = fs.open(k.getInputPath()); fdis.readFully(v.getStripeInformation().getOffset(), buffer, 0, (int) v.getStripeInformation().getLength()); LOG.info("Merged stripe from file " + k.getInputPath() + " [ offset : " + v.getStripeInformation().getOffset() + " length: " + v.getStripeInformation().getLength() + " row: "
LOG.warn("Incompatible ORC file merge! Stripe statistics is missing. " + k.getInputPath()); incompatFileSet.add(k.getInputPath()); return; filePath = k.getInputPath().toUri().getPath(); fixTmpPath(k.getInputPath().getParent()); prevPath = k.getInputPath(); reader = OrcFile.createReader(fs, k.getInputPath()); if (isLogInfoEnabled) { LOG.info("ORC merge file input path: " + k.getInputPath()); incompatFileSet.add(k.getInputPath()); return; if (!k.getInputPath().equals(prevPath)) { reader = OrcFile.createReader(fs, k.getInputPath()); fdis = fs.open(k.getInputPath()); fdis.readFully(v.getStripeInformation().getOffset(), buffer, 0, (int) v.getStripeInformation().getLength()); LOG.info("Merged stripe from file " + k.getInputPath() + " [ offset : " + v.getStripeInformation().getOffset() + " length: " + v.getStripeInformation().getLength() + " row: "
@Test public void testSplitStartsWithOffset() throws IOException { createOrcFile(DEFAULT_STRIPE_SIZE, DEFAULT_STRIPE_SIZE + 1); FileStatus fileStatus = fs.getFileStatus(tmpPath); long length = fileStatus.getLen(); long offset = length / 2; FileSplit split = new FileSplit(tmpPath, offset, length, (String[])null); OrcFileStripeMergeRecordReader reader = new OrcFileStripeMergeRecordReader(conf, split); reader.next(key, value); Assert.assertEquals("InputPath", tmpPath, key.getInputPath()); Assert.assertEquals("NumberOfValues", 1L, value.getStripeStatistics().getColStats(0).getNumberOfValues()); reader.close(); }
private boolean checkCompatibility(OrcFileKeyWrapper k) { // check compatibility with subsequent files if ((k.getTypes().get(0).getSubtypesCount() != columnCount)) { LOG.warn("Incompatible ORC file merge! Column counts mismatch for " + k.getInputPath()); return false; } if (!k.getCompression().equals(compression)) { LOG.warn("Incompatible ORC file merge! Compression codec mismatch for " + k.getInputPath()); return false; } if (k.getCompressBufferSize() != compressBuffSize) { LOG.warn("Incompatible ORC file merge! Compression buffer size mismatch for " + k.getInputPath()); return false; } if (!k.getVersion().equals(version)) { LOG.warn("Incompatible ORC file merge! Version mismatch for " + k.getInputPath()); return false; } if (k.getRowIndexStride() != rowIndexStride) { LOG.warn("Incompatible ORC file merge! Row index stride mismatch for " + k.getInputPath()); return false; } return true; }
LOG.warn("Incompatible ORC file merge! Stripe statistics is missing. " + k.getInputPath()); incompatFileSet.add(k.getInputPath()); return; filePath = k.getInputPath().toUri().getPath(); fixTmpPath(k.getInputPath().getParent()); prevPath = k.getInputPath(); reader = OrcFile.createReader(fs, k.getInputPath()); if (isLogInfoEnabled) { LOG.info("ORC merge file input path: " + k.getInputPath()); incompatFileSet.add(k.getInputPath()); return; if (!k.getInputPath().equals(prevPath)) { reader = OrcFile.createReader(fs, k.getInputPath()); fdis = fs.open(k.getInputPath()); fdis.readFully(v.getStripeInformation().getOffset(), buffer, 0, (int) v.getStripeInformation().getLength()); LOG.info("Merged stripe from file " + k.getInputPath() + " [ offset : " + v.getStripeInformation().getOffset() + " length: " + v.getStripeInformation().getLength() + " row: "