/** * Forward a record of join results. * * @throws HiveException */ @Override public void endGroup() throws HiveException { checkAndGenObject(); }
@Override public void closeOp(boolean abort) throws HiveException { super.closeOp(abort); emptyList = null; joinKeys = null; } }
if (hasFilter(order[aliasNum])) { filterTags[aliasNum] = getFilterTag(rightObj); innerJoin(skip, left, right); } else if (type == JoinDesc.LEFT_SEMI_JOIN) { if (innerJoin(skip, left, right)) { int result = leftOuterJoin(skip, left, right); if (result < 0) { continue; } else if (type == JoinDesc.RIGHT_OUTER_JOIN || (type == JoinDesc.FULL_OUTER_JOIN && allLeftNull)) { if (allLeftFirst && !rightOuterJoin(skip, left, right) || !allLeftFirst && !innerJoin(skip, left, right)) { continue; if (tryLOForFO && leftOuterJoin(skip, left, right) > 0) { loopAgain = allLeftFirst; done = !loopAgain; tryLOForFO = false; } else if (allLeftFirst && !rightOuterJoin(skip, left, right) || !allLeftFirst && !innerJoin(skip, left, right)) { continue; boolean forward = createForwardJoinObject(skipVectors[numAliases - 1]); producedRow |= forward; done = (type == JoinDesc.LEFT_SEMI_JOIN) && forward;
this.storage = clone.storage; this.condn = clone.condn; this.conf = clone.getConf(); this.setSchema(clone.getSchema()); this.alias = clone.alias; this.childOperatorsArray = clone.childOperatorsArray; this.childOperatorsTag = clone.childOperatorsTag; this.setColumnExprMap(clone.getColumnExprMap()); this.dummyObj = clone.dummyObj; this.dummyObjVectors = clone.dummyObjVectors;
private ExprNodeDesc getRSColExprFromResidualFilter(ExprNodeDesc childExpr, CommonJoinOperator<JoinDesc> join) { ExprNodeColumnDesc colExpr = ExprNodeDescUtils.getColumnExpr(childExpr); final String joinColName = colExpr.getColumn(); // use name to get the alias pos of parent and name in parent final int aliasPos = join.getConf().getReversedExprs().get(joinColName); final ExprNodeDesc rsColExpr = join.getColumnExprMap().get(joinColName); // Get the correct parent final ReduceSinkOperator parentRS = (ReduceSinkOperator) (join.getParentOperators().get(aliasPos)); // Fetch the colExpr from parent return parentRS.getColumnExprMap().get( ExprNodeDescUtils.extractColName(rsColExpr)); }
ColumnPrunerProcCtx cppCtx = (ColumnPrunerProcCtx) ctx; List<Operator<? extends OperatorDesc>> childOperators = op .getChildOperators(); LOG.info("JOIN " + op.getIdentifier() + " oldExprs: " + conf.getExprs()); RowSchema joinRS = op.getSchema(); ArrayList<String> outputCols = new ArrayList<String>(); ArrayList<ColumnInfo> rs = new ArrayList<ColumnInfo>(); for (int pos = 0; pos < op.getParentOperators().size(); pos++) { List<ExprNodeDesc> valueCols = conf.getExprs() .get(Byte.valueOf((byte) pos)); LOG.info("JOIN " + op.getIdentifier() + " newExprs: " + conf.getExprs()); op.setColumnExprMap(newColExprMap); conf.setOutputColumnNames(outputCols); op.getSchema().setSignature(rs); cppCtx.getJoinPrunedColLists().put(op, prunedColLists);
long newNumRows = 0; CommonJoinOperator<? extends JoinDesc> jop = (CommonJoinOperator<? extends JoinDesc>) nd; List<Operator<? extends OperatorDesc>> parents = jop.getParentOperators(); int numAttr = 1; AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx; if (!(jop.getParentOperators().get(pos) instanceof ReduceSinkOperator)) { allSatisfyPreCondition = false; break; ReduceSinkOperator rsOp = (ReduceSinkOperator) jop.getParentOperators().get(0); List<String> keyExprs = StatsUtils.getQualifedReducerKeyNames(rsOp.getConf() .getOutputKeyColumnNames()); ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos); Statistics parentStats; parentStats = parent.getStatistics().clone(); Map<String, ExprNodeDesc> colExprMap = jop.getColumnExprMap(); RowSchema rs = jop.getSchema(); List<ColStatistics> outColStats = Lists.newArrayList(); for (ColumnInfo ci : rs.getSignature()) { aspCtx.addAffectedColumn((ExprNodeColumnDesc) end); String colName = ((ExprNodeColumnDesc) end).getColumn(); int pos = jop.getConf().getReversedExprs().get(key); ColStatistics cs = joinStats.get(pos).getColumnStatisticsFromColName(colName); String outColName = key;
protected void checkAndGenObject() throws HiveException { if (closeOpCalled) { LOG.warn("checkAndGenObject is called after operator " + id + " " + getName() + " called closeOp"); return; genAllOneUniqueJoinObject(); } else { genUniqueJoinObject(0, 0); alw.addRow(dummyObj[i]); } else if (!hasEmpty && alw.isSingleRow()) { if (hasAnyFiltered(alias, alw.rowIter().first())) { hasEmpty = true; AbstractRowContainer.RowIterator<List<Object>> iter = alw.rowIter(); for (List<Object> row = iter.first(); row != null; row = iter.next()) { reportProgress(); if (hasAnyFiltered(alias, row)) { hasEmpty = true; break; genAllOneUniqueJoinObject(); } else if (!needsPostEvaluation && !hasEmpty && !hasLeftSemiJoin) { genUniqueJoinObject(0, 0); } else { genJoinObject();
boolean hasFilter = hasFilter(order[0]); AbstractRowContainer.RowIterator<List<Object>> iter = storage[order[0]].rowIter(); for (List<Object> rightObj = iter.first(); rightObj != null; rightObj = iter.next()) { boolean rightNull = rightObj == dummyObj[0]; if (hasFilter) { filterTags[0] = getFilterTag(rightObj); genObject(1, rightFirst, rightNull); rightFirst = false; internalForward(forwardCache, outputObjInspector); countAfterReport = 0;
public SkewJoinHandler(CommonJoinOperator<? extends OperatorDesc> joinOp) { this.joinOp = joinOp; numAliases = joinOp.numAliases; conf = joinOp.getConf(); noOuterJoin = joinOp.noOuterJoin; }
LOG.debug("STATS-" + jop.toString() + ": Overflow in number of rows. " + newNumRows + " rows will be set to Long.MAX_VALUE"); LOG.debug("STATS-" + jop.toString() + ": Equals 0 in number of rows. " + newNumRows + " rows will be set to 1"); newNumRows = 1; for (ColStatistics cs : colStats) { colNameStatsAvailable.add(cs.getColumnName()); int pos = jop.getConf().getReversedExprs().get(cs.getColumnName()); long oldRowCount = rowCountParents.get(pos); double ratio = (double) newNumRows / (double) oldRowCount; for (String colName : jop.getSchema().getColumnNames()) { if (!colNameStatsAvailable.contains(colName)) { neededColumns.add(colName); StatsUtils.estimateRowSizeFromSchema(conf, jop.getSchema().getSignature(), neededColumns); newDataSize = StatsUtils.safeAdd(newDataSize, StatsUtils.safeMult(restColumnsDefaultSize, newNumRows));
private void genUniqueJoinObject(int aliasNum, int forwardCachePos) throws HiveException { AbstractRowContainer.RowIterator<List<Object>> iter = storage[order[aliasNum]].rowIter(); for (List<Object> row = iter.first(); row != null; row = iter.next()) { reportProgress(); int sz = joinValues[order[aliasNum]].size(); int p = forwardCachePos; for (int j = 0; j < sz; j++) { forwardCache[p++] = row.get(j); } if (aliasNum == numAliases - 1) { internalForward(forwardCache, outputObjInspector); countAfterReport = 0; } else { genUniqueJoinObject(aliasNum + 1, p); } } }
int srcPos = join.getParentOperators().indexOf(source); List<Operator<? extends OperatorDesc>> parents = join.getParentOperators(); if (join.getConf().getNullSafes() != null) { for (boolean b : join.getConf().getNullSafes()) { if (b) { return null;
genAllOneUniqueJoinObject(); LOG.info("called genAllOneUniqueJoinObject"); } else { LOG.trace("calling genUniqueJoinObject"); genUniqueJoinObject(0, 0); LOG.trace("called genUniqueJoinObject"); genAllOneUniqueJoinObject(); LOG.trace("called genAllOneUniqueJoinObject"); } else if (!hasEmpty) { LOG.trace("calling genUniqueJoinObject"); genUniqueJoinObject(0, 0); LOG.trace("called genUniqueJoinObject"); } else { LOG.trace("calling genObject"); genObject(null, 0, new IntermediateObject(new ArrayList[numAliases], 0), true); LOG.trace("called genObject");
private void genJoinObject() throws HiveException { boolean rightFirst = true; boolean hasFilter = hasFilter(order[0]); AbstractRowContainer.RowIterator<List<Object>> iter = storage[order[0]].rowIter(); for (List<Object> rightObj = iter.first(); rightObj != null; rightObj = iter.next()) { boolean rightNull = rightObj == dummyObj[0]; if (hasFilter) { filterTags[0] = getFilterTag(rightObj); } skipVectors[0][0] = rightNull; intermediate[0] = rightObj; genObject(1, rightFirst, rightNull); rightFirst = false; } }
ArrayList<boolean[]> newNulls = joinObjects(inputNulls, newObj, intObj, aliasNum, childFirstRow); genObject(newNulls, aliasNum + 1, intObj, firstRow); while (nullsIter.hasNext()) { boolean[] nullsVec = nullsIter.next(); createForwardJoinObject(intObj, nullsVec);
protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException { forward(row, outputOI); }
currBigKeyTag = -1; joinOp.newGroupStarted = false; dummyKey = (List<Object>) joinOp.getGroupKeyObject(); skewKeyInCurrentGroup = false;
private void genUniqueJoinObject(int aliasNum, int forwardCachePos) throws HiveException { AbstractRowContainer<ArrayList<Object>> alias = storage.get(order[aliasNum]); for (ArrayList<Object> row = alias.first(); row != null; row = alias.next()) { int sz = joinValues.get(order[aliasNum]).size(); int p = forwardCachePos; for (int j = 0; j < sz; j++) { forwardCache[p++] = row.get(j); } if (aliasNum == numAliases - 1) { forward(forwardCache, outputObjInspector); countAfterReport = 0; } else { genUniqueJoinObject(aliasNum + 1, p); } } }