public OrcFileKeyWrapper createKey() { return new OrcFileKeyWrapper(); }
private boolean checkCompatibility(OrcFileKeyWrapper k) { // check compatibility with subsequent files if ((k.getTypes().get(0).getSubtypesCount() != columnCount)) { LOG.warn("Incompatible ORC file merge! Column counts mismatch for " + k.getInputPath()); return false; } if (!k.getCompression().equals(compression)) { LOG.warn("Incompatible ORC file merge! Compression codec mismatch for " + k.getInputPath()); return false; } if (k.getCompressBufferSize() != compressBuffSize) { LOG.warn("Incompatible ORC file merge! Compression buffer size mismatch for " + k.getInputPath()); return false; } if (!k.getVersion().equals(version)) { LOG.warn("Incompatible ORC file merge! Version mismatch for " + k.getInputPath()); return false; } if (k.getRowIndexStride() != rowIndexStride) { LOG.warn("Incompatible ORC file merge! Row index stride mismatch for " + k.getInputPath()); return false; } return true; }
private boolean checkCompatibility(OrcFileKeyWrapper k) { // check compatibility with subsequent files if (!fileSchema.equals(k.getFileSchema())) { LOG.warn("Incompatible ORC file merge! Schema mismatch for " + k.getInputPath()); return false; } if (!k.getCompression().equals(compression)) { LOG.warn("Incompatible ORC file merge! Compression codec mismatch for " + k.getInputPath()); return false; } if (k.getCompressBufferSize() != compressBuffSize) { LOG.warn("Incompatible ORC file merge! Compression buffer size mismatch for " + k.getInputPath()); return false; } if (!k.getFileVersion().equals(fileVersion)) { LOG.warn("Incompatible ORC file merge! File version mismatch for " + k.getInputPath()); return false; } if (!k.getWriterVersion().equals(writerVersion)) { LOG.warn("Incompatible ORC file merge! Writer version mismatch for " + k.getInputPath()); return false; } if (k.getRowIndexStride() != rowIndexStride) { LOG.warn("Incompatible ORC file merge! Row index stride mismatch for " + k.getInputPath()); return false; } return true; }
keyWrapper.setInputPath(path); keyWrapper.setIsIncompatFile(true); skipFile = true; return true; valueWrapper.setUserMetadata(((ReaderImpl) reader).getOrcProtoUserMetadata()); keyWrapper.setInputPath(path); keyWrapper.setCompression(reader.getCompressionKind()); keyWrapper.setCompressBufferSize(reader.getCompressionSize()); keyWrapper.setVersion(reader.getFileVersion()); keyWrapper.setRowIndexStride(reader.getRowIndexStride()); keyWrapper.setTypes(reader.getTypes()); } else { stripeIdx++;
keyWrapper.setInputPath(path); keyWrapper.setIsIncompatFile(true); skipFile = true; return true; valueWrapper.setUserMetadata(((ReaderImpl) reader).getOrcProtoUserMetadata()); keyWrapper.setInputPath(path); keyWrapper.setCompression(reader.getCompressionKind()); keyWrapper.setCompressBufferSize(reader.getCompressionSize()); keyWrapper.setFileVersion(reader.getFileVersion()); keyWrapper.setWriterVersion(reader.getWriterVersion()); keyWrapper.setRowIndexStride(reader.getRowIndexStride()); keyWrapper.setFileSchema(reader.getSchema()); } else { stripeIdx++;
@Test public void testSplitStartsWithOffset() throws IOException { createOrcFile(DEFAULT_STRIPE_SIZE, DEFAULT_STRIPE_SIZE + 1); FileStatus fileStatus = fs.getFileStatus(tmpPath); long length = fileStatus.getLen(); long offset = length / 2; FileSplit split = new FileSplit(tmpPath, offset, length, (String[])null); OrcFileStripeMergeRecordReader reader = new OrcFileStripeMergeRecordReader(conf, split); reader.next(key, value); Assert.assertEquals("InputPath", tmpPath, key.getInputPath()); Assert.assertEquals("NumberOfValues", 1L, value.getStripeStatistics().getColStats(0).getNumberOfValues()); reader.close(); }
if (k.isIncompatFile()) { LOG.warn("Incompatible ORC file merge! Stripe statistics is missing. " + k.getInputPath()); addIncompatibleFile(k.getInputPath()); return; filePath = k.getInputPath().toUri().getPath(); fixTmpPath(k.getInputPath().getParent()); prevPath = k.getInputPath(); reader = OrcFile.createReader(fs, k.getInputPath()); if (LOG.isInfoEnabled()) { LOG.info("ORC merge file input path: " + k.getInputPath()); compression = k.getCompression(); compressBuffSize = k.getCompressBufferSize(); fileVersion = k.getFileVersion(); writerVersion = k.getWriterVersion(); fileSchema = k.getFileSchema(); rowIndexStride = k.getRowIndexStride(); addIncompatibleFile(k.getInputPath()); return; if (!k.getInputPath().equals(prevPath)) { reader = OrcFile.createReader(fs, k.getInputPath()); fdis = fs.open(k.getInputPath()); fdis.readFully(v.getStripeInformation().getOffset(), buffer, 0,
keyWrapper.setInputPath(path); keyWrapper.setIsIncompatFile(true); skipFile = true; return true; valueWrapper.setUserMetadata(((ReaderImpl) reader).getOrcProtoUserMetadata()); keyWrapper.setInputPath(path); keyWrapper.setCompression(reader.getCompression()); keyWrapper.setCompressBufferSize(reader.getCompressionSize()); keyWrapper.setVersion(reader.getFileVersion()); keyWrapper.setRowIndexStride(reader.getRowIndexStride()); keyWrapper.setTypes(reader.getTypes()); } else { stripeIdx++;
if (k.isIncompatFile()) { LOG.warn("Incompatible ORC file merge! Stripe statistics is missing. " + k.getInputPath()); incompatFileSet.add(k.getInputPath()); return; filePath = k.getInputPath().toUri().getPath(); fixTmpPath(k.getInputPath().getParent()); prevPath = k.getInputPath(); reader = OrcFile.createReader(fs, k.getInputPath()); if (isLogInfoEnabled) { LOG.info("ORC merge file input path: " + k.getInputPath()); compression = k.getCompression(); compressBuffSize = k.getCompressBufferSize(); version = k.getVersion(); columnCount = k.getTypes().get(0).getSubtypesCount(); rowIndexStride = k.getRowIndexStride(); incompatFileSet.add(k.getInputPath()); return; if (!k.getInputPath().equals(prevPath)) { reader = OrcFile.createReader(fs, k.getInputPath()); fdis = fs.open(k.getInputPath()); fdis.readFully(v.getStripeInformation().getOffset(), buffer, 0, (int) v.getStripeInformation().getLength());
public OrcFileKeyWrapper createKey() { return new OrcFileKeyWrapper(); }
private boolean checkCompatibility(OrcFileKeyWrapper k) { // check compatibility with subsequent files if ((k.getTypes().get(0).getSubtypesCount() != columnCount)) { LOG.warn("Incompatible ORC file merge! Column counts mismatch for " + k.getInputPath()); return false; } if (!k.getCompression().equals(compression)) { LOG.warn("Incompatible ORC file merge! Compression codec mismatch for " + k.getInputPath()); return false; } if (k.getCompressBufferSize() != compressBuffSize) { LOG.warn("Incompatible ORC file merge! Compression buffer size mismatch for " + k.getInputPath()); return false; } if (!k.getVersion().equals(version)) { LOG.warn("Incompatible ORC file merge! Version mismatch for " + k.getInputPath()); return false; } if (k.getRowIndexStride() != rowIndexStride) { LOG.warn("Incompatible ORC file merge! Row index stride mismatch for " + k.getInputPath()); return false; } return true; }
@Before public void setup() throws IOException { conf = new Configuration(); fs = FileSystem.getLocal(conf); key = new OrcFileKeyWrapper(); value = new OrcFileValueWrapper(); tmpPath = prepareTmpPath(); }
if (k.isIncompatFile()) { LOG.warn("Incompatible ORC file merge! Stripe statistics is missing. " + k.getInputPath()); incompatFileSet.add(k.getInputPath()); return; filePath = k.getInputPath().toUri().getPath(); fixTmpPath(k.getInputPath().getParent()); prevPath = k.getInputPath(); reader = OrcFile.createReader(fs, k.getInputPath()); if (isLogInfoEnabled) { LOG.info("ORC merge file input path: " + k.getInputPath()); compression = k.getCompression(); compressBuffSize = k.getCompressBufferSize(); version = k.getVersion(); columnCount = k.getTypes().get(0).getSubtypesCount(); rowIndexStride = k.getRowIndexStride(); incompatFileSet.add(k.getInputPath()); return; if (!k.getInputPath().equals(prevPath)) { reader = OrcFile.createReader(fs, k.getInputPath()); fdis = fs.open(k.getInputPath()); fdis.readFully(v.getStripeInformation().getOffset(), buffer, 0, (int) v.getStripeInformation().getLength());
public OrcFileKeyWrapper createKey() { return new OrcFileKeyWrapper(); }