public static <T> T getFromPathRecursively(Map<Path, T> pathToPartitionInfo, Path dir, Map<Map<Path, T>, Map<Path, T>> cacheMap) throws IOException { return getFromPathRecursively(pathToPartitionInfo, dir, cacheMap, false); }
public static <T> T getFromPathRecursively(Map<Path, T> pathToPartitionInfo, Path dir, Map<Map<Path, T>, Map<Path, T>> cacheMap, boolean ignoreSchema) throws IOException { return getFromPathRecursively(pathToPartitionInfo, dir, cacheMap, ignoreSchema, false); }
private PartitionDesc extractSinglePartSpec(CombineHiveInputSplit hsplit) throws IOException { PartitionDesc part = null; Map<Map<Path,PartitionDesc>, Map<Path,PartitionDesc>> cache = new HashMap<>(); for (Path path : hsplit.getPaths()) { PartitionDesc otherPart = HiveFileFormatUtils.getFromPathRecursively( pathToPartInfo, path, cache); LOG.debug("Found spec for " + path + " " + otherPart + " from " + pathToPartInfo); if (part == null) { part = otherPart; } else if (otherPart != part) { // Assume we should have the exact same object. // TODO: we could also compare the schema and SerDe, and pass only those to the call // instead; most of the time these would be the same and LLAP IO can handle that. LOG.warn("Multiple partitions found; not going to pass a part spec to LLAP IO: {" + part.getPartSpec() + "} and {" + otherPart.getPartSpec() + "}"); return null; } } return part; }
public JobConf pushProjectionsAndFilters(JobConf jobConf, Path path) throws IOException { updateMrWork(jobConf); // TODO: refactor this in HIVE-6366 final JobConf cloneJobConf = new JobConf(jobConf); final PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, path, null, false, true); try { if ((part != null) && (part.getTableDesc() != null)) { Utilities.copyTableJobPropertiesToConf(part.getTableDesc(), cloneJobConf); } } catch (Exception e) { throw new IOException(e); } pushProjectionsAndFilters(cloneJobConf, path.toString(), path.toUri().getPath()); return cloneJobConf; } }
private boolean schemaEvolved(InputSplit s, InputSplit prevSplit, boolean groupAcrossFiles, MapWork work) throws IOException { boolean retval = false; Path path = ((FileSplit) s).getPath(); PartitionDesc pd = HiveFileFormatUtils.getFromPathRecursively( work.getPathToPartitionInfo(), path, cache); String currentDeserializerClass = pd.getDeserializerClassName(); Class<?> currentInputFormatClass = pd.getInputFileFormatClass(); Class<?> previousInputFormatClass = null; String previousDeserializerClass = null; if (prevSplit != null) { Path prevPath = ((FileSplit) prevSplit).getPath(); if (!groupAcrossFiles) { return !path.equals(prevPath); } PartitionDesc prevPD = HiveFileFormatUtils.getFromPathRecursively(work.getPathToPartitionInfo(), prevPath, cache); previousDeserializerClass = prevPD.getDeserializerClassName(); previousInputFormatClass = prevPD.getInputFileFormatClass(); } if ((currentInputFormatClass != previousInputFormatClass) || (!currentDeserializerClass.equals(previousDeserializerClass))) { retval = true; } if (LOG.isDebugEnabled()) { LOG.debug("Adding split " + path + " to src new group? " + retval); } return retval; }
public static void getPartitionValues(VectorizedRowBatchCtx vrbCtx, MapWork mapWork, FileSplit split, Object[] partitionValues) throws IOException { Map<Path, PartitionDesc> pathToPartitionInfo = mapWork.getPathToPartitionInfo(); PartitionDesc partDesc = HiveFileFormatUtils .getFromPathRecursively(pathToPartitionInfo, split.getPath(), IOPrepareCache.get().getPartitionDescMap()); getPartitionValues(vrbCtx, partDesc, partitionValues); }
PartitionDesc partDesc = HiveFileFormatUtils.getFromPathRecursively(parts, path, null); if (partDesc == null) { LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: no partition desc for " + path);
public CombineHiveInputSplit(JobConf job, CombineFileSplit inputSplitShim, Map<Path, PartitionDesc> pathToPartitionInfo) throws IOException { this.inputSplitShim = inputSplitShim; this.pathToPartitionInfo = pathToPartitionInfo; if (job != null) { if (this.pathToPartitionInfo == null) { this.pathToPartitionInfo = Utilities.getMapWork(job).getPathToPartitionInfo(); } // extract all the inputFormatClass names for each chunk in the // CombinedSplit. Path[] ipaths = inputSplitShim.getPaths(); if (ipaths.length > 0) { PartitionDesc part = HiveFileFormatUtils .getFromPathRecursively(this.pathToPartitionInfo, ipaths[0], IOPrepareCache.get().getPartitionDescMap()); inputFormatClassName = part.getInputFileFormatClass().getName(); } } }
ArrayList<String> a = HiveFileFormatUtils.getFromPathRecursively( mapWork.getPathToAliases(), new Path(splitPath), null, false, true); if (a != null) {
/** * Writable interface. */ @Override public void write(DataOutput out) throws IOException { inputSplitShim.write(out); if (inputFormatClassName == null) { if (pathToPartitionInfo == null) { pathToPartitionInfo = Utilities.getMapWork(getJob()).getPathToPartitionInfo(); } // extract all the inputFormatClass names for each chunk in the // CombinedSplit. PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively(pathToPartitionInfo, inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap()); // create a new InputFormat instance if this is the first time to see // this class inputFormatClassName = part.getInputFileFormatClass().getName(); } out.writeUTF(inputFormatClassName); } }
PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap()); TableDesc tableDesc = part.getTableDesc();
PartitionDesc part = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, hsplit.getPath(), null); if (LOG.isDebugEnabled()) {
ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("file:///tbl/par1/part2/part3"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("file:///tbl/par1/part2/part3 not found.", partDesc_3, ret); ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("/tbl/par1/part2/part3"), IOPrepareCache.get().allocatePartitionDescMap()); ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part3"), IOPrepareCache.get().allocatePartitionDescMap()); ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("file:///tbl/par1/part2/part4"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("file:///tbl/par1/part2/part4 not found.", partDesc_4, ret); ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("/tbl/par1/part2/part4"), IOPrepareCache.get().allocatePartitionDescMap()); assertEquals("/tbl/par1/part2/part4 not found.", partDesc_4, ret); ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part4"), IOPrepareCache.get().allocatePartitionDescMap()); ret = HiveFileFormatUtils.getFromPathRecursively( pathToPartitionInfo, new Path("file:///tbl/par1/part2/part5"),
.getFromPathRecursively(pathToPartitionInfo, filePath, IOPrepareCache.get().getPartitionDescMap()); } catch (AssertionError ae) {
PartitionDesc partDesc = HiveFileFormatUtils.getFromPathRecursively(parts, path, null); if (partDesc == null) { LlapIoImpl.LOG.info("Not using VertorDeserializeOrcWriter: no partition desc for " + path);