@Override public int hashCode() { final int prime = 31; int result = 1; result = result * prime + (getInputFileFormatClass() == null ? 0 : getInputFileFormatClass().hashCode()); result = result * prime + (getOutputFileFormatClass() == null ? 0 : getOutputFileFormatClass().hashCode()); result = result * prime + (getProperties() == null ? 0 : getProperties().hashCode()); result = result * prime + (getTableDesc() == null ? 0 : getTableDesc().hashCode()); result = result * prime + (getPartSpec() == null ? 0 : getPartSpec().hashCode()); result = result * prime + (getVectorPartitionDesc() == null ? 0 : getVectorPartitionDesc().hashCode()); return result; }
public static PartitionDesc getPartitionDesc(Partition part, TableDesc tableDesc) throws HiveException { return new PartitionDesc(part, tableDesc); }
/** * Derive additional attributes to be rendered by EXPLAIN. * TODO: this method is relied upon by custom input formats to set jobconf properties. * This is madness? - This is Hive Storage Handlers! */ public void deriveExplainAttributes() { if (pathToPartitionInfo != null) { for (Map.Entry<Path, PartitionDesc> entry : pathToPartitionInfo.entrySet()) { entry.getValue().deriveBaseFileName(entry.getKey()); } } MapredLocalWork mapLocalWork = getMapRedLocalWork(); if (mapLocalWork != null) { mapLocalWork.deriveExplainAttributes(); } }
@Override public PartitionDesc read(Kryo kryo, Input input, Class<PartitionDesc> type) { PartitionDesc partitionDesc = super.read(kryo, input, type); // The set methods in PartitionDesc intern the any duplicate strings which is why we call them // during de-serialization partitionDesc.setBaseFileName(partitionDesc.getBaseFileName()); partitionDesc.setPartSpec(partitionDesc.getPartSpec()); partitionDesc.setInputFileFormatClass(partitionDesc.getInputFileFormatClass()); partitionDesc.setOutputFileFormatClass(partitionDesc.getOutputFileFormatClass()); return partitionDesc; } }
public ColumnTruncateWork(List<Integer> droppedColumns, Path inputDir, Path outputDir, boolean hasDynamicPartitions, DynamicPartitionCtx dynPartCtx) { super(); this.droppedColumns = droppedColumns; this.inputDir = inputDir; this.outputDir = outputDir; this.hasDynamicPartitions = hasDynamicPartitions; this.dynPartCtx = dynPartCtx; PartitionDesc partDesc = new PartitionDesc(); partDesc.setInputFileFormatClass(RCFileBlockMergeInputFormat.class); this.addPathToPartitionInfo(inputDir, partDesc); }
public PartitionDesc(final Partition part) throws HiveException { this(part, getTableDesc(part.getTable())); }
private VectorPartitionContext(PartitionDesc partDesc) { this.partDesc = partDesc; TableDesc td = partDesc.getTableDesc(); // Use table properties in case of unpartitioned tables, // and the union of table properties and partition properties, with partition // taking precedence, in the case of partitioned tables Properties overlayedProps = SerDeUtils.createOverlayedProperties(td.getProperties(), partDesc.getProperties()); Map<String, String> partSpec = partDesc.getPartSpec(); tableName = String.valueOf(overlayedProps.getProperty("name")); partName = String.valueOf(partSpec); }
@SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyPartition(Path path, JobConf job, PartitionDesc partDesc, Path hiveScratchDir) throws Exception { String strPath = path.toString(); // The input file does not exist, replace it by a empty file if (partDesc.getTableDesc().isNonNative()) { // if this isn't a hive table we can't create an empty file for it. return path; } Properties props = SerDeUtils.createOverlayedProperties( partDesc.getTableDesc().getProperties(), partDesc.getProperties()); HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, partDesc); boolean oneRow = partDesc.getInputFileFormatClass() == OneNullRowInputFormat.class; Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, oneRow); LOG.info("Changed input file {} to empty file {} ({})", strPath, newPath, oneRow); return newPath; }
when(mockTableDesc.getProperties()).thenReturn(new Properties()); when(mockPartitionDesc.getProperties()).thenReturn(new Properties()); when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc); doReturn(HiveSequenceFileOutputFormat.class).when( mockPartitionDesc).getOutputFileFormatClass();
public void initEmptyInputChildren(List<Operator<?>> children, Configuration hconf) throws SerDeException, Exception { setChildOperators(children); Map<String, Configuration> tableNameToConf = cloneConfsForNestedColPruning(hconf); for (Operator<?> child : children) { TableScanOperator tsOp = (TableScanOperator) child; StructObjectInspector soi = null; PartitionDesc partDesc = conf.getAliasToPartnInfo().get(tsOp.getConf().getAlias()); Configuration newConf = tableNameToConf.get(partDesc.getTableDesc().getTableName()); Deserializer serde = partDesc.getTableDesc().getDeserializer(); partDesc.setProperties(partDesc.getProperties()); MapOpCtx opCtx = new MapOpCtx(tsOp.getConf().getAlias(), child, partDesc); StructObjectInspector tableRowOI = (StructObjectInspector) serde.getObjectInspector(); initObjectInspector(newConf, opCtx, tableRowOI); soi = opCtx.rowObjectInspector; child.getParentOperators().add(this); childrenOpToOI.put(child, soi); } }
/** * Return a deserializer object corresponding to the partitionDesc. */ public Deserializer getDeserializer(Configuration conf) throws Exception { Properties schema = getProperties(); String clazzName = getDeserializerClassName(); Deserializer deserializer = ReflectionUtil.newInstance(conf.getClassByName(clazzName) .asSubclass(Deserializer.class), conf); SerDeUtils.initializeSerDe(deserializer, conf, getTableDesc().getProperties(), schema); return deserializer; }
@SuppressWarnings("rawtypes") private static Path createDummyFileForEmptyTable(JobConf job, MapWork work, Path hiveScratchDir, String alias) throws Exception { TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc(); if (tableDesc.isNonNative()) { // if it does not need native storage, we can't create an empty file for it. return null; } Properties props = tableDesc.getProperties(); HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, tableDesc); Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, false); LOG.info("Changed input file for alias {} to newPath", alias, newPath); // update the work LinkedHashMap<Path, ArrayList<String>> pathToAliases = work.getPathToAliases(); ArrayList<String> newList = new ArrayList<String>(1); newList.add(alias); pathToAliases.put(newPath, newList); work.setPathToAliases(pathToAliases); PartitionDesc pDesc = work.getAliasToPartnInfo().get(alias).clone(); work.addPathToPartitionInfo(newPath, pDesc); return newPath; }
if (isEmptyPath) { PartitionDesc partDesc = work.getPathToPartitionInfo().get(path); props = partDesc.getProperties(); outFileFormat = partDesc.getOutputFileFormatClass(); nonNative = partDesc.getTableDesc().isNonNative(); } else { TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc(); props = tableDesc.getProperties(); outFileFormat = tableDesc.getOutputFileFormatClass(); pathToPartitionInfo.remove(path); } else { PartitionDesc pDesc = work.getAliasToPartnInfo().get(alias).clone(); pathToPartitionInfo.put(newPath.toUri().toString(), pDesc);
public PartitionDesc(final Partition part) throws HiveException { PartitionDescConstructorHelper(part, getTableDesc(part.getTable()), true); if (Utilities.isInputFileFormatSelfDescribing(this)) { // if IF is self describing no need to send column info per partition, since its not used anyway. Table tbl = part.getTable(); setProperties(MetaStoreUtils.getSchemaWithoutCols(part.getTPartition().getSd(), part.getTPartition().getSd(), part.getParameters(), tbl.getDbName(), tbl.getTableName(), tbl.getPartitionKeys())); } else { setProperties(part.getMetadataFromPartitionSchema()); } }
private void processAlias(MapWork work, Path path, Collection<String> aliasesAffected, Set<String> aliases) { // the aliases that are allowed to map to a null scan. Collection<String> allowed = aliasesAffected.stream() .filter(a -> aliases.contains(a)).collect(Collectors.toList()); if (!allowed.isEmpty()) { PartitionDesc partDesc = work.getPathToPartitionInfo().get(path).clone(); PartitionDesc newPartition = changePartitionToMetadataOnly(partDesc, path); // Prefix partition with something to avoid it being a hidden file. Path fakePath = new Path(NullScanFileSystem.getBase() + newPartition.getTableName() + "/part" + encode(newPartition.getPartSpec())); StringInternUtils.internUriStringsInPath(fakePath); work.addPathToPartitionInfo(fakePath, newPartition); work.addPathToAlias(fakePath, new ArrayList<>(allowed)); aliasesAffected.removeAll(allowed); if (aliasesAffected.isEmpty()) { work.removePathToAlias(path); work.removePathToPartitionInfo(path); } } }
protected FetchInputFormatSplit[] getNextSplits() throws Exception { while (getNextPath()) { // not using FileInputFormat.setInputPaths() here because it forces a connection to the // default file system - which may or may not be online during pure metadata operations job.set("mapred.input.dir", StringUtils.escapeString(currPath.toString())); // Fetch operator is not vectorized and as such turn vectorization flag off so that // non-vectorized record reader is created below. HiveConf.setBoolVar(job, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false); Class<? extends InputFormat> formatter = currDesc.getInputFileFormatClass(); Utilities.copyTableJobPropertiesToConf(currDesc.getTableDesc(), job); InputFormat inputFormat = getInputFormatFromCache(formatter, job); InputSplit[] splits = inputFormat.getSplits(job, 1); FetchInputFormatSplit[] inputSplits = new FetchInputFormatSplit[splits.length]; for (int i = 0; i < splits.length; i++) { inputSplits[i] = new FetchInputFormatSplit(splits[i], inputFormat); } if (work.getSplitSample() != null) { inputSplits = splitSampling(work.getSplitSample(), inputSplits); } if (inputSplits.length > 0) { return inputSplits; } } return null; }
private PartitionDesc changePartitionToMetadataOnly(PartitionDesc desc, Path path) { if (desc == null) { return null; } boolean isEmpty = false; try { isEmpty = Utilities.isEmptyPath(physicalContext.getConf(), path); } catch (IOException e) { LOG.error("Cannot determine if the table is empty", e); } desc.setInputFileFormatClass( isEmpty ? ZeroRowsInputFormat.class : OneNullRowInputFormat.class); desc.setOutputFileFormatClass(HiveIgnoreKeyTextOutputFormat.class); desc.getProperties().setProperty(serdeConstants.SERIALIZATION_LIB, NullStructSerDe.class.getName()); return desc; }
private void processCurrPathForMmWriteIds(InputFormat inputFormat, List<Path> dirs, List<Path> dirsWithOriginals) throws IOException { if (inputFormat instanceof HiveInputFormat) { dirs.add(currPath); // No need to process here. } ValidWriteIdList validWriteIdList; if (AcidUtils.isInsertOnlyTable(currDesc.getTableDesc().getProperties())) { validWriteIdList = extractValidWriteIdList(); } else { validWriteIdList = null; // non-MM case } if (validWriteIdList != null) { Utilities.FILE_OP_LOGGER.info("Processing " + currDesc.getTableName() + " for MM paths"); } HiveInputFormat.processPathsForMmRead( Lists.newArrayList(currPath), job, validWriteIdList, dirs, dirsWithOriginals); }
public String getDeserializerClassName() { Properties schema = getProperties(); String clazzName = schema.getProperty(serdeConstants.SERIALIZATION_LIB); if (clazzName == null) { throw new IllegalStateException("Property " + serdeConstants.SERIALIZATION_LIB + " cannot be null"); } return clazzName; }
private PartitionDesc extractSinglePartSpec(CombineHiveInputSplit hsplit) throws IOException { PartitionDesc part = null; Map<Map<Path,PartitionDesc>, Map<Path,PartitionDesc>> cache = new HashMap<>(); for (Path path : hsplit.getPaths()) { PartitionDesc otherPart = HiveFileFormatUtils.getFromPathRecursively( pathToPartInfo, path, cache); LOG.debug("Found spec for " + path + " " + otherPart + " from " + pathToPartInfo); if (part == null) { part = otherPart; } else if (otherPart != part) { // Assume we should have the exact same object. // TODO: we could also compare the schema and SerDe, and pass only those to the call // instead; most of the time these would be the same and LLAP IO can handle that. LOG.warn("Multiple partitions found; not going to pass a part spec to LLAP IO: {" + part.getPartSpec() + "} and {" + otherPart.getPartSpec() + "}"); return null; } } return part; }