private int getAndVerifyIndex(HashMap<Long, Integer> posMap, List<HdfsFileStatusWithId> files, OrcTail[] result, Long fileId) { int ix = posMap.get(fileId); assert result[ix] == null; assert fileId != null && fileId.equals(files.get(ix).getFileId()); return ix; }
private int getAndVerifyIndex(HashMap<Long, Integer> posMap, List<HdfsFileStatusWithId> files, OrcTail[] result, Long fileId) { int ix = posMap.get(fileId); assert result[ix] == null; assert fileId != null && fileId.equals(files.get(ix).getFileId()); return ix; }
private static OrcTail createOrcTailFromMs( HdfsFileStatusWithId file, ByteBuffer bb) throws IOException { if (bb == null) { return null; } FileStatus fs = file.getFileStatus(); ByteBuffer copy = bb.duplicate(); try { OrcTail orcTail = ReaderImpl.extractFileTail(copy, fs.getLen(), fs.getModificationTime()); // trigger lazy read of metadata to make sure serialized data is not corrupted and readable orcTail.getStripeStatistics(); return orcTail; } catch (Exception ex) { byte[] data = new byte[bb.remaining()]; System.arraycopy(bb.array(), bb.arrayOffset() + bb.position(), data, 0, data.length); String msg = "Failed to parse the footer stored in cache for file ID " + file.getFileId() + " " + bb + " [ " + Hex.encodeHexString(data) + " ]"; LOG.error(msg, ex); return null; } }
long logicalLen = AcidUtils.getLogicalLength(fs, fileStatus); if (logicalLen != 0) { Object fileKey = isDefaultFs ? file.getFileId() : null; if (fileKey == null && allowSyntheticFileIds) { fileKey = new SyntheticFileId(fileStatus);
@Override public List<OrcSplit> getSplits() throws IOException { List<OrcSplit> splits = Lists.newArrayList(); for (HdfsFileStatusWithId file : fileStatuses) { FileStatus fileStatus = file.getFileStatus(); if (fileStatus.getLen() != 0) { Object fileKey = file.getFileId(); if (fileKey == null && allowSyntheticFileIds) { fileKey = new SyntheticFileId(fileStatus); } TreeMap<Long, BlockLocation> blockOffsets = SHIMS.getLocationsWithOffset(fs, fileStatus); for (Map.Entry<Long, BlockLocation> entry : blockOffsets.entrySet()) { OrcSplit orcSplit = new OrcSplit(fileStatus.getPath(), fileKey, entry.getKey(), entry.getValue().getLength(), entry.getValue().getHosts(), null, isOriginal, true, deltas, -1, fileStatus.getLen()); splits.add(orcSplit); } } } // add uncovered ACID delta splits splits.addAll(super.getSplits()); return splits; }
private List<Long> determineFileIdsToQuery( List<HdfsFileStatusWithId> files, OrcTail[] result, HashMap<Long, Integer> posMap) { for (int i = 0; i < result.length; ++i) { if (result[i] != null) continue; HdfsFileStatusWithId file = files.get(i); final FileStatus fs = file.getFileStatus(); Long fileId = file.getFileId(); if (fileId == null) { if (!isInTest) { if (!isWarnLogged || isDebugEnabled) { LOG.warn("Not using metastore cache because fileId is missing: " + fs.getPath()); isWarnLogged = true; } continue; } fileId = generateTestFileId(fs, files, i); LOG.info("Generated file ID " + fileId + " at " + i); } posMap.put(fileId, i); } return Lists.newArrayList(posMap.keySet()); }
public SplitGenerator(SplitInfo splitInfo, UserGroupInformation ugi, boolean allowSyntheticFileIds) throws IOException { this.ugi = ugi; this.context = splitInfo.context; this.fs = splitInfo.fs; this.file = splitInfo.fileWithId.getFileStatus(); this.fsFileId = splitInfo.fileWithId.getFileId(); this.blockSize = this.file.getBlockSize(); this.orcTail = splitInfo.orcTail; this.readerTypes = splitInfo.readerTypes; // TODO: potential DFS call this.locations = SHIMS.getLocationsWithOffset(fs, file); this.isOriginal = splitInfo.isOriginal; this.deltas = splitInfo.deltas; this.hasBase = splitInfo.hasBase; this.projColsUncompressedSize = -1; this.deltaSplits = splitInfo.getSplits(); this.allowSyntheticFileIds = allowSyntheticFileIds; this.ppdResult = splitInfo.ppdResult; }
private static OrcTail createOrcTailFromMs( HdfsFileStatusWithId file, ByteBuffer bb) throws IOException { if (bb == null) return null; FileStatus fs = file.getFileStatus(); ByteBuffer copy = bb.duplicate(); try { OrcTail orcTail = ReaderImpl.extractFileTail(copy, fs.getLen(), fs.getModificationTime()); // trigger lazy read of metadata to make sure serialized data is not corrupted and readable orcTail.getStripeStatistics(); return orcTail; } catch (Exception ex) { byte[] data = new byte[bb.remaining()]; System.arraycopy(bb.array(), bb.arrayOffset() + bb.position(), data, 0, data.length); String msg = "Failed to parse the footer stored in cache for file ID " + file.getFileId() + " " + bb + " [ " + Hex.encodeHexString(data) + " ]"; LOG.error(msg, ex); return null; } }
private List<Long> determineFileIdsToQuery( List<HdfsFileStatusWithId> files, OrcTail[] result, HashMap<Long, Integer> posMap) { for (int i = 0; i < result.length; ++i) { if (result[i] != null) { continue; } HdfsFileStatusWithId file = files.get(i); final FileStatus fs = file.getFileStatus(); Long fileId = file.getFileId(); if (fileId == null) { if (!isInTest) { if (!isWarnLogged || LOG.isDebugEnabled()) { LOG.warn("Not using metastore cache because fileId is missing: " + fs.getPath()); isWarnLogged = true; } continue; } fileId = generateTestFileId(fs, files, i); LOG.info("Generated file ID " + fileId + " at " + i); } posMap.put(fileId, i); } return Lists.newArrayList(posMap.keySet()); }
public SplitGenerator(SplitInfo splitInfo, UserGroupInformation ugi, boolean allowSyntheticFileIds, boolean isDefaultFs) throws IOException { this.ugi = ugi; this.context = splitInfo.context; this.fs = splitInfo.fs; this.file = splitInfo.fileWithId.getFileStatus(); this.fsFileId = isDefaultFs ? splitInfo.fileWithId.getFileId() : null; this.blockSize = this.file.getBlockSize(); this.orcTail = splitInfo.orcTail; this.readerTypes = splitInfo.readerTypes; // TODO: potential DFS call this.locations = SHIMS.getLocationsWithOffset(fs, file); this.isOriginal = splitInfo.isOriginal; this.deltas = splitInfo.deltas; this.hasBase = splitInfo.hasBase; this.rootDir = splitInfo.dir; this.projColsUncompressedSize = -1; this.deltaSplits = splitInfo.getSplits(); this.allowSyntheticFileIds = allowSyntheticFileIds; this.ppdResult = splitInfo.ppdResult; }