org.apache.hadoop.hive.ql.plan.MapJoinDesc.getPosBigTable java code examples

private byte findSmallTable() {
 byte smallTablePos = -1;
 for (byte pos = 0; pos < mapJoinTables.length; pos++) {
  if (pos != conf.getPosBigTable()) {
   smallTablePos = pos;
   break;
  }
 }
 Preconditions.checkState(smallTablePos != -1);
 return smallTablePos;
}

/**
 * Clean up data participating the join, i.e. in-mem and on-disk files for small table(s) and big table
 */
private void cleanupGraceHashJoin() {
 for (byte pos = 0; pos < mapJoinTables.length; pos++) {
  if (pos != conf.getPosBigTable()) {
   LOG.info("Cleaning up small table data at pos: " + pos);
   HybridHashTableContainer container = (HybridHashTableContainer) mapJoinTables[pos];
   container.clear();
  }
 }
}

/**
 * Clean up data participating the join, i.e. in-mem and on-disk files for small table(s) and big table
 */
private void cleanupGraceHashJoin() {
 for (byte pos = 0; pos < mapJoinTables.length; pos++) {
  if (pos != conf.getPosBigTable()) {
   LOG.info("Cleaning up small table data at pos: " + pos);
   HybridHashTableContainer container = (HybridHashTableContainer) mapJoinTables[pos];
   container.clear();
  }
 }
}

@Override
public void initializeOp(Configuration hconf) throws HiveException {
 final int posBigTable = conf.getPosBigTable();
 savePosBigTable = posBigTable;
 // We need a input object inspector that is for the row we will extract out of the
 // vectorized row batch, not for example, an original inspector for an ORC table, etc.
 inputObjInspectors[posBigTable] =
   VectorizedBatchUtil.convertToStandardStructObjectInspector((StructObjectInspector) inputObjInspectors[posBigTable]);
 // Call super VectorMapJoinOuterFilteredOperator, which calls super MapJoinOperator with
 // new input inspector.
 super.initializeOp(hconf);
 firstBatch = true;
}

/**
 * Continue processing join between spilled hashtable(s) and spilled big table
 * @param partitionId the partition number across all small tables to process
 * @throws HiveException
 * @throws IOException
 * @throws SerDeException
 */
private void continueProcess(int partitionId)
  throws HiveException, IOException, SerDeException, ClassNotFoundException {
 for (byte pos = 0; pos < mapJoinTables.length; pos++) {
  if (pos != conf.getPosBigTable()) {
   LOG.info("Going to reload hash partition " + partitionId);
   reloadHashTable(pos, partitionId);
  }
 }
 reProcessBigTable(partitionId);
}

/**
 * Continue processing join between spilled hashtable(s) and spilled big table
 * @param partitionId the partition number across all small tables to process
 * @throws HiveException
 * @throws IOException
 * @throws SerDeException
 */
private void continueProcess(int partitionId)
  throws HiveException, IOException, SerDeException, ClassNotFoundException {
 for (byte pos = 0; pos < mapJoinTables.length; pos++) {
  if (pos != conf.getPosBigTable()) {
   LOG.info("Going to reload hash partition " + partitionId);
   reloadHashTable(pos, partitionId);
  }
 }
 reProcessBigTable(partitionId);
}

@Override
public void initializeOp(Configuration hconf) throws HiveException {
 final int posBigTable = conf.getPosBigTable();
 savePosBigTable = posBigTable;
 // We need a input object inspector that is for the row we will extract out of the
 // vectorized row batch, not for example, an original inspector for an ORC table, etc.
 inputObjInspectors[posBigTable] =
   VectorizedBatchUtil.convertToStandardStructObjectInspector((StructObjectInspector) inputObjInspectors[posBigTable]);
 // Call super VectorMapJoinOuterFilteredOperator, which calls super MapJoinOperator with
 // new input inspector.
 super.initializeOp(hconf);
 firstBatch = true;
}

@Override
public boolean isSame(OperatorDesc other) {
 if (super.isSame(other)) {
  MapJoinDesc otherDesc = (MapJoinDesc) other;
  return Objects.equals(getParentToInput(), otherDesc.getParentToInput()) &&
    Objects.equals(getKeyCountsExplainDesc(), otherDesc.getKeyCountsExplainDesc()) &&
    getPosBigTable() == otherDesc.getPosBigTable() &&
    isBucketMapJoin() == otherDesc.isBucketMapJoin();
 }
 return false;
}

@Override
protected List<ObjectInspector> getValueObjectInspectors(
  byte alias, List<ObjectInspector>[] aliasToObjectInspectors) {
 int[] valueIndex = conf.getValueIndex(alias);
 if (valueIndex == null) {
  return super.getValueObjectInspectors(alias, aliasToObjectInspectors);
 }
 List<ObjectInspector> inspectors = aliasToObjectInspectors[alias];
 int bigPos = conf.getPosBigTable();
 Converter[] converters = new Converter[valueIndex.length];
 List<ObjectInspector> valueOI = new ArrayList<ObjectInspector>();
 for (int i = 0; i < valueIndex.length; i++) {
  if (valueIndex[i] >= 0 && !joinKeysObjectInspectors[bigPos].isEmpty()) {
   if (conf.getNoOuterJoin()) {
    valueOI.add(joinKeysObjectInspectors[bigPos].get(valueIndex[i]));
   } else {
    // It is an outer join. We are going to add the inspector from the
    // inner side, but the key value will come from the outer side, so
    // we need to create a converter from inputOI to outputOI.
    valueOI.add(inspectors.get(i));
    converters[i] = ObjectInspectorConverters.getConverter(
        joinKeysObjectInspectors[bigPos].get(valueIndex[i]), inspectors.get(i));
   }
  } else {
   valueOI.add(inspectors.get(i));
  }
 }
 unwrapContainer[alias] = new UnwrapRowContainer(alias, valueIndex, converters, hasFilter(alias));
 return valueOI;
}

private Boolean findGrandChildSubqueryMapjoin(MapJoinWalkerCtx ctx, MapJoinOperator mapJoin) {
 Operator<? extends OperatorDesc> parent = mapJoin;
 while (true) {
  if (parent.getChildOperators() == null || parent.getChildOperators().size() != 1) {
   return null;
  }
  Operator<? extends OperatorDesc> ch = parent.getChildOperators().get(0);
  if (ch instanceof MapJoinOperator) {
   if (!nonSubqueryMapJoin((MapJoinOperator) ch, mapJoin)) {
    if (ch.getParentOperators().indexOf(parent) == ((MapJoinOperator) ch).getConf()
      .getPosBigTable()) {
     // not come from the local branch
     return true;
    }
   }
   return false; // not from a sub-query.
  }
  if ((ch instanceof JoinOperator) || (ch instanceof UnionOperator)
    || (ch instanceof ReduceSinkOperator) || (ch instanceof LateralViewJoinOperator)
    || (ch instanceof GroupByOperator) || (ch instanceof ScriptOperator)) {
   return null;
  }
  parent = ch;
 }
}

public static <T> Set<T> findOperatorsUpstreamJoinAccounted(Operator<?> start, Class<T> clazz,
  Set<T> found) {
 if (clazz.isInstance(start)) {
  found.add((T) start);
 }
 int onlyIncludeIndex = -1;
 if (start instanceof AbstractMapJoinOperator) {
  AbstractMapJoinOperator mapJoinOp = (AbstractMapJoinOperator) start;
  MapJoinDesc desc = (MapJoinDesc) mapJoinOp.getConf();
  onlyIncludeIndex = desc.getPosBigTable();
 }
 if (start.getParentOperators() != null) {
  int i = 0;
  for (Operator<?> parent : start.getParentOperators()) {
   if (onlyIncludeIndex >= 0) {
    if (onlyIncludeIndex == i) {
     findOperatorsUpstreamJoinAccounted(parent, clazz, found);
    }
   } else {
    findOperatorsUpstreamJoinAccounted(parent, clazz, found);
   }
   i++;
  }
 }
 return found;
}

public VectorMapJoinBaseOperator(CompilationOpContext ctx,
  VectorizationContext vContext, OperatorDesc conf) throws HiveException {
 super(ctx);
 MapJoinDesc desc = (MapJoinDesc) conf;
 this.conf = desc;
 order = desc.getTagOrder();
 numAliases = desc.getExprs().size();
 posBigTable = (byte) desc.getPosBigTable();
 filterMaps = desc.getFilterMap();
 noOuterJoin = desc.isNoOuterJoin();
  // We are making a new output vectorized row batch.
 vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames(),
   /* vContextEnvironment */ vContext);
}

public VectorMapJoinBaseOperator(CompilationOpContext ctx, OperatorDesc conf,
  VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException {
 super(ctx);
 MapJoinDesc desc = (MapJoinDesc) conf;
 this.conf = desc;
 this.vContext = vContext;
 this.vectorDesc = (VectorMapJoinDesc) vectorDesc;
 order = desc.getTagOrder();
 numAliases = desc.getExprs().size();
 posBigTable = (byte) desc.getPosBigTable();
 filterMaps = desc.getFilterMap();
 noOuterJoin = desc.isNoOuterJoin();
  // We are making a new output vectorized row batch.
 vOutContext = new VectorizationContext(getName(), desc.getOutputColumnNames(),
   /* vContextEnvironment */ vContext);
 vOutContext.setInitialTypeInfos(Arrays.asList(getOutputTypeInfos(desc)));
}

private boolean validateMapJoinDesc(MapJoinDesc desc) {
 byte posBigTable = (byte) desc.getPosBigTable();
 List<ExprNodeDesc> filterExprs = desc.getFilters().get(posBigTable);
 if (!validateExprNodeDesc(
   filterExprs, "Filter", VectorExpressionDescriptor.Mode.FILTER, /* allowComplex */ true)) {
  return false;
 }
 List<ExprNodeDesc> keyExprs = desc.getKeys().get(posBigTable);
 if (!validateExprNodeDesc(keyExprs, "Key")) {
  return false;
 }
 List<ExprNodeDesc> valueExprs = desc.getExprs().get(posBigTable);
 if (!validateExprNodeDesc(valueExprs, "Value")) {
  return false;
 }
 Byte[] order = desc.getTagOrder();
 Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]);
 List<ExprNodeDesc> smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable);
 if (!validateExprNodeDesc(smallTableExprs, "Small Table")) {
  return false;
 }
 if (desc.getResidualFilterExprs() != null && !desc.getResidualFilterExprs().isEmpty()) {
  setOperatorIssue("Non-equi joins not supported");
  return false;
 }
 return true;
}

if (idx == getConf().getPosBigTable()) {
 continue;

/**
 * Iterate over the big table row container and feed process() with leftover rows
 * @param partitionId the partition from which to take out spilled big table rows
 * @throws HiveException
 */
protected void reProcessBigTable(int partitionId) throws HiveException {
 // For binary join, firstSmallTable is the only small table; it has reference to spilled big
 // table rows;
 // For n-way join, since we only spill once, when processing the first small table, so only the
 // firstSmallTable has reference to the spilled big table rows.
 HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
 ObjectContainer bigTable = partition.getMatchfileObjContainer();
 LOG.info("Hybrid Grace Hash Join: Going to process spilled big table rows in partition " +
   partitionId + ". Number of rows: " + bigTable.size());
 while (bigTable.hasNext()) {
  Object row = bigTable.next();
  process(row, conf.getPosBigTable());
 }
 bigTable.clear();
}

/**
 * Iterate over the big table row container and feed process() with leftover rows
 * @param partitionId the partition from which to take out spilled big table rows
 * @throws HiveException
 */
protected void reProcessBigTable(int partitionId) throws HiveException {
 // For binary join, firstSmallTable is the only small table; it has reference to spilled big
 // table rows;
 // For n-way join, since we only spill once, when processing the first small table, so only the
 // firstSmallTable has reference to the spilled big table rows.
 HashPartition partition = firstSmallTable.getHashPartitions()[partitionId];
 ObjectContainer bigTable = partition.getMatchfileObjContainer();
 LOG.info("Hybrid Grace Hash Join: Going to process spilled big table rows in partition " +
   partitionId + ". Number of rows: " + bigTable.size());
 while (bigTable.hasNext()) {
  Object row = bigTable.next();
  process(row, conf.getPosBigTable());
 }
 bigTable.clear();
}

@Override
@SuppressWarnings("unchecked")
protected void initializeOp(Configuration hconf) throws HiveException {
 if (conf.getGenJoinKeys()) {
  int tagLen = conf.getTagLength();
  joinKeys = new List[tagLen];
  JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), NOTSKIPBIGTABLE, hconf);
  joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys,
    inputObjInspectors,NOTSKIPBIGTABLE, tagLen);
 }
 super.initializeOp(hconf);
 numMapRowsRead = 0;
 // all other tables are small, and are cached in the hash table
 posBigTable = (byte) conf.getPosBigTable();
 emptyList = new RowContainer<List<Object>>(1, hconf, reporter);
 RowContainer<List<Object>> bigPosRC = JoinUtil.getRowContainer(hconf,
   rowContainerStandardObjectInspectors[posBigTable],
   posBigTable, joinCacheSize,spillTableDesc, conf,
   !hasFilter(posBigTable), reporter);
 storage[posBigTable] = bigPosRC;
}

@Override
@SuppressWarnings("unchecked")
protected void initializeOp(Configuration hconf) throws HiveException {
 if (conf.getGenJoinKeys()) {
  int tagLen = conf.getTagLength();
  joinKeys = new List[tagLen];
  JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(), NOTSKIPBIGTABLE, hconf);
  joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys,
    inputObjInspectors,NOTSKIPBIGTABLE, tagLen);
 }
 super.initializeOp(hconf);
 numMapRowsRead = 0;
 // all other tables are small, and are cached in the hash table
 posBigTable = (byte) conf.getPosBigTable();
 emptyList = new RowContainer<List<Object>>(1, hconf, reporter);
 RowContainer<List<Object>> bigPosRC = JoinUtil.getRowContainer(hconf,
   rowContainerStandardObjectInspectors[posBigTable],
   posBigTable, joinCacheSize,spillTableDesc, conf,
   !hasFilter(posBigTable), reporter);
 storage[posBigTable] = bigPosRC;
}

public HashTableSinkDesc(MapJoinDesc clone) {
 this.bigKeysDirMap = clone.getBigKeysDirMap();
 this.conds = clone.getConds();
 this.exprs = new HashMap<Byte, List<ExprNodeDesc>>(clone.getExprs());
 this.handleSkewJoin = clone.getHandleSkewJoin();
 this.keyTableDesc = clone.getKeyTableDesc();
 this.noOuterJoin = clone.getNoOuterJoin();
 this.outputColumnNames = clone.getOutputColumnNames();
 this.reversedExprs = clone.getReversedExprs();
 this.skewKeyDefinition = clone.getSkewKeyDefinition();
 this.skewKeysValuesTables = clone.getSkewKeysValuesTables();
 this.smallKeysDirMap = clone.getSmallKeysDirMap();
 this.tagOrder = clone.getTagOrder();
 this.filters = new HashMap<Byte, List<ExprNodeDesc>>(clone.getFilters());
 this.filterMap = clone.getFilterMap();
 this.keys = new HashMap<Byte, List<ExprNodeDesc>>(clone.getKeys());
 this.keyTblDesc = clone.getKeyTblDesc();
 this.valueTblDescs = clone.getValueTblDescs();
 this.valueTblFilteredDescs = clone.getValueFilteredTblDescs();
 this.posBigTable = clone.getPosBigTable();
 this.retainList = clone.getRetainList();
 this.dumpFilePrefix = clone.getDumpFilePrefix();
 this.bucketMapjoinContext = new BucketMapJoinContext(clone);
 this.hashtableMemoryUsage = clone.getHashTableMemoryUsage();
}

How to use getPosBigTablemethodin org.apache.hadoop.hive.ql.plan.MapJoinDesc

Best Java code snippets using org.apache.hadoop.hive.ql.plan.MapJoinDesc.getPosBigTable (Showing top 20 results out of 315)

How to use
getPosBigTable
method
in
org.apache.hadoop.hive.ql.plan.MapJoinDesc