if (ppdResult != FooterCache.NO_SPLIT_AFTER_PPD && file.getFileStatus().getLen() > 0) { result.add(new SplitInfo(context, dir.fs, file, orcTail, readerTypes, isOriginal, deltas, true, dir.dir, covered, ppdResult)); if (file.getFileStatus().getLen() > 0) { result.add(new SplitInfo(context, dir.fs, file, null, readerTypes, isOriginal, deltas, true, dir.dir, covered, null));
for (HdfsFileStatusWithId file : fileStatuses) { if (isOriginal && isAcid && vectorMode) { offsetAndBucket = VectorizedOrcAcidRowBatchReader.computeOffsetAndBucket(file.getFileStatus(), dir, isOriginal, !deltas.isEmpty(), conf); FileStatus fileStatus = file.getFileStatus(); long logicalLen = AcidUtils.getLogicalLength(fs, fileStatus); if (logicalLen != 0) { Object fileKey = isDefaultFs ? file.getFileId() : null; if (fileKey == null && allowSyntheticFileIds) { fileKey = new SyntheticFileId(fileStatus);
FileStatus fileStatus = origFile.getFileStatus(); if(fileStatus != null) { compactionMetaInfo.numberOfBytes += fileStatus.getLen();
if (childrenWithId != null) { for (HdfsFileStatusWithId child : childrenWithId) { getChildState(child.getFileStatus(), child, txnList, working, originalDirectories, original, obsolete, bestBase, ignoreEmptyFiles); obsolete.add(fswid.getFileStatus());
if (childrenWithId != null) { for (HdfsFileStatusWithId child : childrenWithId) { getChildState(child.getFileStatus(), child, writeIdList, working, originalDirectories, original, obsolete, bestBase, ignoreEmptyFiles, abortedDirectories, tblproperties, fs, validTxnList); obsolete.add(fswid.getFileStatus()); return o1.getFileStatus().compareTo(o2.getFileStatus()); });
Path path = stat.getFileStatus().getPath(); dirsToSearch.add(path); LOG.debug("Adding original file " + path + " to dirs to search");
Path path = stat.getFileStatus().getPath();
= AcidUtils.getAcidState(mergerOptions.getRootPath(), conf, validWriteIdList, false, true); for (HadoopShims.HdfsFileStatusWithId f : directoryState.getOriginalFiles()) { int bucketIdFromPath = AcidUtils.parseBucketId(f.getFileStatus().getPath()); if (bucketIdFromPath != bucketId) { continue;//todo: HIVE-16952 break; if (f.getFileStatus().getPath().equals(mergerOptions.getBucketPath())) { continue; Reader copyReader = OrcFile.createReader(f.getFileStatus().getPath(), OrcFile.readerOptions(conf)); rowIdOffsetTmp += copyReader.getNumberOfRows();
baseSize += origStat.getFileStatus().getLen();
long totalFileSize = 0; for (HdfsFileStatusWithId child : baseFiles) { totalFileSize += child.getFileStatus().getLen(); AcidOutputFormat.Options opts = AcidUtils.parseBaseOrDeltaBucketFilename (child.getFileStatus().getPath(), context.conf); opts.writingBase(true); int b = opts.getBucket();
if (child.getFileStatus().isDir()) { findOriginals(fs, child.getFileStatus(), original, useFileIds); } else { original.add(child);
baseSize += origStat.getFileStatus().getLen();
if (ppdResult != FooterCache.NO_SPLIT_AFTER_PPD && file.getFileStatus().getLen() > 0) { result.add(new SplitInfo(context, dir.fs, file, orcTail, readerTypes, isOriginal, deltas, true, dir.dir, covered, ppdResult)); if (file.getFileStatus().getLen() > 0) { result.add(new SplitInfo(context, dir.fs, file, null, readerTypes, isOriginal, deltas, true, dir.dir, covered, null));
if (child.getFileStatus().isDirectory()) { if (recursive) { findOriginals(fs, child.getFileStatus(), original, useFileIds, ignoreEmptyFiles, true); if(!ignoreEmptyFiles || child.getFileStatus().getLen() > 0) { original.add(child);
FileStatus fileStatus = origFile.getFileStatus(); if(fileStatus != null) { compactionMetaInfo.numberOfBytes += fileStatus.getLen();
validWriteIdList, false, true); for (HadoopShims.HdfsFileStatusWithId f : directoryState.getOriginalFiles()) { int bucketIdFromPath = AcidUtils.parseBucketId(f.getFileStatus().getPath()); if (bucketIdFromPath != bucketId) { continue;//HIVE-16952 if (f.getFileStatus().getPath().equals(file.getPath())) { Reader reader = OrcFile.createReader(f.getFileStatus().getPath(), OrcFile.readerOptions(conf)); rowIdOffset += reader.getNumberOfRows();
List<HdfsFileStatusWithId> result = dir.getOriginalFiles(); assertEquals(5, result.size()); assertEquals("mock:/tbl/part1/000000_0", result.get(0).getFileStatus().getPath().toString()); assertEquals("mock:/tbl/part1/000001_1", result.get(1).getFileStatus().getPath().toString()); assertEquals("mock:/tbl/part1/000002_0", result.get(2).getFileStatus().getPath().toString()); assertEquals("mock:/tbl/part1/random", result.get(3).getFileStatus().getPath().toString()); assertEquals("mock:/tbl/part1/subdir/000000_0", result.get(4).getFileStatus().getPath().toString()); List<AcidUtils.ParsedDelta> deltas = dir.getCurrentDirectories(); assertEquals(2, deltas.size());
@Override public void getAndValidate(final List<HadoopShims.HdfsFileStatusWithId> files, final boolean isOriginal, final OrcTail[] result, final ByteBuffer[] ppdResult) throws IOException, HiveException { // TODO: should local cache also be by fileId? Preserve the original logic for now. assert result.length == files.size(); int i = -1; for (HadoopShims.HdfsFileStatusWithId fileWithId : files) { ++i; FileStatus file = fileWithId.getFileStatus(); Path path = file.getPath(); TailAndFileData tfd = cache.getIfPresent(path); if (LOG.isDebugEnabled()) { LOG.debug("Serialized tail " + (tfd == null ? "not " : "") + "cached for path: " + path); } if (tfd == null) continue; if (file.getLen() == tfd.fileLength && file.getModificationTime() == tfd.fileModTime) { result[i] = ReaderImpl.extractFileTail(tfd.bb.duplicate(), tfd.fileLength, tfd.fileModTime); continue; } // Invalidate cache.invalidate(path); if (LOG.isDebugEnabled()) { LOG.debug("Meta-Info for : " + path + " changed. CachedModificationTime: " + tfd.fileModTime + ", CurrentModificationTime: " + file.getModificationTime() + ", CachedLength: " + tfd.fileLength + ", CurrentLength: " + file.getLen()); } } }
long totalFileSize = 0; for (HdfsFileStatusWithId child : baseFiles) { totalFileSize += child.getFileStatus().getLen(); int b = AcidUtils.parseBucketId(child.getFileStatus().getPath());
fileList.add(hfs.getFileStatus());