org.apache.hadoop.hive.ql.plan.PartitionDesc java code examples

@Override
public int hashCode() {
 final int prime = 31;
 int result = 1;
 result = result * prime +
   (getInputFileFormatClass() == null ? 0 : getInputFileFormatClass().hashCode());
 result = result * prime +
   (getOutputFileFormatClass() == null ? 0 : getOutputFileFormatClass().hashCode());
 result = result * prime + (getProperties() == null ? 0 : getProperties().hashCode());
 result = result * prime + (getTableDesc() == null ? 0 : getTableDesc().hashCode());
 result = result * prime + (getPartSpec() == null ? 0 : getPartSpec().hashCode());
 result = result * prime +
   (getVectorPartitionDesc() == null ? 0 : getVectorPartitionDesc().hashCode());
 return result;
}

public static PartitionDesc getPartitionDesc(Partition part, TableDesc tableDesc) throws
  HiveException {
 return new PartitionDesc(part, tableDesc);
}

/**
 * Derive additional attributes to be rendered by EXPLAIN.
 * TODO: this method is relied upon by custom input formats to set jobconf properties.
 *       This is madness? - This is Hive Storage Handlers!
 */
public void deriveExplainAttributes() {
 if (pathToPartitionInfo != null) {
  for (Map.Entry<Path, PartitionDesc> entry : pathToPartitionInfo.entrySet()) {
   entry.getValue().deriveBaseFileName(entry.getKey());
  }
 }
 MapredLocalWork mapLocalWork = getMapRedLocalWork();
 if (mapLocalWork != null) {
  mapLocalWork.deriveExplainAttributes();
 }
}

 @Override
 public PartitionDesc read(Kryo kryo, Input input, Class<PartitionDesc> type) {
  PartitionDesc partitionDesc = super.read(kryo, input, type);
  // The set methods in PartitionDesc intern the any duplicate strings which is why we call them
  // during de-serialization
  partitionDesc.setBaseFileName(partitionDesc.getBaseFileName());
  partitionDesc.setPartSpec(partitionDesc.getPartSpec());
  partitionDesc.setInputFileFormatClass(partitionDesc.getInputFileFormatClass());
  partitionDesc.setOutputFileFormatClass(partitionDesc.getOutputFileFormatClass());
  return partitionDesc;
 }
}

public ColumnTruncateWork(List<Integer> droppedColumns, Path inputDir, Path outputDir,
  boolean hasDynamicPartitions, DynamicPartitionCtx dynPartCtx) {
 super();
 this.droppedColumns = droppedColumns;
 this.inputDir = inputDir;
 this.outputDir = outputDir;
 this.hasDynamicPartitions = hasDynamicPartitions;
 this.dynPartCtx = dynPartCtx;
 PartitionDesc partDesc = new PartitionDesc();
 partDesc.setInputFileFormatClass(RCFileBlockMergeInputFormat.class);
 this.addPathToPartitionInfo(inputDir, partDesc);
}

public PartitionDesc(final Partition part) throws HiveException {
 this(part, getTableDesc(part.getTable()));
}

private VectorPartitionContext(PartitionDesc partDesc) {
 this.partDesc = partDesc;
 TableDesc td = partDesc.getTableDesc();
 // Use table properties in case of unpartitioned tables,
 // and the union of table properties and partition properties, with partition
 // taking precedence, in the case of partitioned tables
 Properties overlayedProps =
   SerDeUtils.createOverlayedProperties(td.getProperties(), partDesc.getProperties());
 Map<String, String> partSpec = partDesc.getPartSpec();
 tableName = String.valueOf(overlayedProps.getProperty("name"));
 partName = String.valueOf(partSpec);
}

@SuppressWarnings("rawtypes")
private static Path createDummyFileForEmptyPartition(Path path, JobConf job, PartitionDesc partDesc,
                           Path hiveScratchDir) throws Exception {
 String strPath = path.toString();
 // The input file does not exist, replace it by a empty file
 if (partDesc.getTableDesc().isNonNative()) {
  // if this isn't a hive table we can't create an empty file for it.
  return path;
 }
 Properties props = SerDeUtils.createOverlayedProperties(
   partDesc.getTableDesc().getProperties(), partDesc.getProperties());
 HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, partDesc);
 boolean oneRow = partDesc.getInputFileFormatClass() == OneNullRowInputFormat.class;
 Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, oneRow);
 LOG.info("Changed input file {} to empty file {} ({})", strPath, newPath, oneRow);
 return newPath;
}

when(mockTableDesc.getProperties()).thenReturn(new Properties());
when(mockPartitionDesc.getProperties()).thenReturn(new Properties());
when(mockPartitionDesc.getTableDesc()).thenReturn(mockTableDesc);
doReturn(HiveSequenceFileOutputFormat.class).when(
    mockPartitionDesc).getOutputFileFormatClass();

public void initEmptyInputChildren(List<Operator<?>> children, Configuration hconf)
 throws SerDeException, Exception {
 setChildOperators(children);
 Map<String, Configuration> tableNameToConf = cloneConfsForNestedColPruning(hconf);
 for (Operator<?> child : children) {
  TableScanOperator tsOp = (TableScanOperator) child;
  StructObjectInspector soi = null;
  PartitionDesc partDesc = conf.getAliasToPartnInfo().get(tsOp.getConf().getAlias());
  Configuration newConf = tableNameToConf.get(partDesc.getTableDesc().getTableName());
  Deserializer serde = partDesc.getTableDesc().getDeserializer();
  partDesc.setProperties(partDesc.getProperties());
  MapOpCtx opCtx = new MapOpCtx(tsOp.getConf().getAlias(), child, partDesc);
  StructObjectInspector tableRowOI = (StructObjectInspector) serde.getObjectInspector();
  initObjectInspector(newConf, opCtx, tableRowOI);
  soi = opCtx.rowObjectInspector;
  child.getParentOperators().add(this);
  childrenOpToOI.put(child, soi);
 }
}

/**
 * Return a deserializer object corresponding to the partitionDesc.
 */
public Deserializer getDeserializer(Configuration conf) throws Exception {
 Properties schema = getProperties();
 String clazzName = getDeserializerClassName();
 Deserializer deserializer = ReflectionUtil.newInstance(conf.getClassByName(clazzName)
   .asSubclass(Deserializer.class), conf);
 SerDeUtils.initializeSerDe(deserializer, conf, getTableDesc().getProperties(), schema);
 return deserializer;
}

@SuppressWarnings("rawtypes")
private static Path createDummyFileForEmptyTable(JobConf job, MapWork work,
  Path hiveScratchDir, String alias)
    throws Exception {
 TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc();
 if (tableDesc.isNonNative()) {
  // if it does not need native storage, we can't create an empty file for it.
  return null;
 }
 Properties props = tableDesc.getProperties();
 HiveOutputFormat outFileFormat = HiveFileFormatUtils.getHiveOutputFormat(job, tableDesc);
 Path newPath = createEmptyFile(hiveScratchDir, outFileFormat, job, props, false);
 LOG.info("Changed input file for alias {} to newPath", alias, newPath);
 // update the work
 LinkedHashMap<Path, ArrayList<String>> pathToAliases = work.getPathToAliases();
 ArrayList<String> newList = new ArrayList<String>(1);
 newList.add(alias);
 pathToAliases.put(newPath, newList);
 work.setPathToAliases(pathToAliases);
 PartitionDesc pDesc = work.getAliasToPartnInfo().get(alias).clone();
 work.addPathToPartitionInfo(newPath, pDesc);
 return newPath;
}

if (isEmptyPath) {
 PartitionDesc partDesc = work.getPathToPartitionInfo().get(path);
 props = partDesc.getProperties();
 outFileFormat = partDesc.getOutputFileFormatClass();
 nonNative = partDesc.getTableDesc().isNonNative();
} else {
 TableDesc tableDesc = work.getAliasToPartnInfo().get(alias).getTableDesc();
 props = tableDesc.getProperties();
 outFileFormat = tableDesc.getOutputFileFormatClass();
 pathToPartitionInfo.remove(path);
} else {
 PartitionDesc pDesc = work.getAliasToPartnInfo().get(alias).clone();
 pathToPartitionInfo.put(newPath.toUri().toString(), pDesc);

public PartitionDesc(final Partition part) throws HiveException {
 PartitionDescConstructorHelper(part, getTableDesc(part.getTable()), true);
 if (Utilities.isInputFileFormatSelfDescribing(this)) {
  // if IF is self describing no need to send column info per partition, since its not used anyway.
  Table tbl = part.getTable();
  setProperties(MetaStoreUtils.getSchemaWithoutCols(part.getTPartition().getSd(), part.getTPartition().getSd(),
    part.getParameters(), tbl.getDbName(), tbl.getTableName(), tbl.getPartitionKeys()));
 } else {
  setProperties(part.getMetadataFromPartitionSchema());
 }
}

private void processAlias(MapWork work, Path path,
  Collection<String> aliasesAffected, Set<String> aliases) {
 // the aliases that are allowed to map to a null scan.
 Collection<String> allowed = aliasesAffected.stream()
   .filter(a -> aliases.contains(a)).collect(Collectors.toList());
 if (!allowed.isEmpty()) {
  PartitionDesc partDesc = work.getPathToPartitionInfo().get(path).clone();
  PartitionDesc newPartition =
    changePartitionToMetadataOnly(partDesc, path);
  // Prefix partition with something to avoid it being a hidden file.
  Path fakePath =
    new Path(NullScanFileSystem.getBase() + newPartition.getTableName()
      + "/part" + encode(newPartition.getPartSpec()));
  StringInternUtils.internUriStringsInPath(fakePath);
  work.addPathToPartitionInfo(fakePath, newPartition);
  work.addPathToAlias(fakePath, new ArrayList<>(allowed));
  aliasesAffected.removeAll(allowed);
  if (aliasesAffected.isEmpty()) {
   work.removePathToAlias(path);
   work.removePathToPartitionInfo(path);
  }
 }
}

protected FetchInputFormatSplit[] getNextSplits() throws Exception {
 while (getNextPath()) {
  // not using FileInputFormat.setInputPaths() here because it forces a connection to the
  // default file system - which may or may not be online during pure metadata operations
  job.set("mapred.input.dir", StringUtils.escapeString(currPath.toString()));
  // Fetch operator is not vectorized and as such turn vectorization flag off so that
  // non-vectorized record reader is created below.
  HiveConf.setBoolVar(job, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false);
  Class<? extends InputFormat> formatter = currDesc.getInputFileFormatClass();
  Utilities.copyTableJobPropertiesToConf(currDesc.getTableDesc(), job);
  InputFormat inputFormat = getInputFormatFromCache(formatter, job);
  InputSplit[] splits = inputFormat.getSplits(job, 1);
  FetchInputFormatSplit[] inputSplits = new FetchInputFormatSplit[splits.length];
  for (int i = 0; i < splits.length; i++) {
   inputSplits[i] = new FetchInputFormatSplit(splits[i], inputFormat);
  }
  if (work.getSplitSample() != null) {
   inputSplits = splitSampling(work.getSplitSample(), inputSplits);
  }
  if (inputSplits.length > 0) {
   return inputSplits;
  }
 }
 return null;
}

private PartitionDesc changePartitionToMetadataOnly(PartitionDesc desc,
  Path path) {
 if (desc == null) {
  return null;
 }
 boolean isEmpty = false;
 try {
  isEmpty = Utilities.isEmptyPath(physicalContext.getConf(), path);
 } catch (IOException e) {
  LOG.error("Cannot determine if the table is empty", e);
 }
 desc.setInputFileFormatClass(
   isEmpty ? ZeroRowsInputFormat.class : OneNullRowInputFormat.class);
 desc.setOutputFileFormatClass(HiveIgnoreKeyTextOutputFormat.class);
 desc.getProperties().setProperty(serdeConstants.SERIALIZATION_LIB,
   NullStructSerDe.class.getName());
 return desc;
}

private void processCurrPathForMmWriteIds(InputFormat inputFormat,
  List<Path> dirs, List<Path> dirsWithOriginals) throws IOException {
 if (inputFormat instanceof HiveInputFormat) {
  dirs.add(currPath); // No need to process here.
 }
 ValidWriteIdList validWriteIdList;
 if (AcidUtils.isInsertOnlyTable(currDesc.getTableDesc().getProperties())) {
  validWriteIdList = extractValidWriteIdList();
 } else {
  validWriteIdList = null;  // non-MM case
 }
 if (validWriteIdList != null) {
  Utilities.FILE_OP_LOGGER.info("Processing " + currDesc.getTableName() + " for MM paths");
 }
 HiveInputFormat.processPathsForMmRead(
   Lists.newArrayList(currPath), job, validWriteIdList, dirs, dirsWithOriginals);
}

public String getDeserializerClassName() {
 Properties schema = getProperties();
 String clazzName = schema.getProperty(serdeConstants.SERIALIZATION_LIB);
 if (clazzName == null) {
  throw new IllegalStateException("Property " + serdeConstants.SERIALIZATION_LIB +
    " cannot be null");
 }
 return clazzName;
}

private PartitionDesc extractSinglePartSpec(CombineHiveInputSplit hsplit) throws IOException {
 PartitionDesc part = null;
 Map<Map<Path,PartitionDesc>, Map<Path,PartitionDesc>> cache = new HashMap<>();
 for (Path path : hsplit.getPaths()) {
  PartitionDesc otherPart = HiveFileFormatUtils.getFromPathRecursively(
    pathToPartInfo, path, cache);
  LOG.debug("Found spec for " + path + " " + otherPart + " from " + pathToPartInfo);
  if (part == null) {
   part = otherPart;
  } else if (otherPart != part) { // Assume we should have the exact same object.
   // TODO: we could also compare the schema and SerDe, and pass only those to the call
   //       instead; most of the time these would be the same and LLAP IO can handle that.
   LOG.warn("Multiple partitions found; not going to pass a part spec to LLAP IO: {"
     + part.getPartSpec() + "} and {" + otherPart.getPartSpec() + "}");
   return null;
  }
 }
 return part;
}

Javadoc

PartitionDesc.

Most used methods

getTableDesc
getProperties
<init>
getInputFileFormatClass
getOutputFileFormatClass
getPartSpec
clone
deriveBaseFileName
Attempt to derive a virtual base file name property from the path. If path format is unrecognized, j
setInputFileFormatClass
setOutputFileFormatClass
setProperties
getDeserializer

Popular in Java

Finding current android device location
compareTo (BigDecimal)
getExternalFilesDir (Context)
scheduleAtFixedRate (Timer)
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
URLEncoder (java.net)
This class is used to encode a string using the format required by application/x-www-form-urlencoded
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
TreeMap (java.util)
Walk the nodes of the tree left-to-right or right-to-left. Note that in descending iterations, next
ExecutorService (java.util.concurrent)
An Executor that provides methods to manage termination and methods that can produce a Future for tr
Base64 (org.apache.commons.codec.binary)
Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
From CI to AI: The AI layer in your organization

How to usePartitionDesc in org.apache.hadoop.hive.ql.plan

Best Java code snippets using org.apache.hadoop.hive.ql.plan.PartitionDesc (Showing top 20 results out of 315)

How to use
PartitionDesc
in
org.apache.hadoop.hive.ql.plan