public Column[] getFirstStageShuffleKeyColumns() { List<Column> shuffleKeyColumns = new ArrayList<Column>(); shuffleKeyColumns.add(getOutSchema().getColumn(0)); //distinctseq column if (groupingColumns != null) { for (Column eachColumn: groupingColumns) { if (!shuffleKeyColumns.contains(eachColumn)) { shuffleKeyColumns.add(eachColumn); } } } for (GroupbyNode eachGroupbyNode: subGroupbyPlan) { if (eachGroupbyNode.getGroupingColumns() != null && eachGroupbyNode.getGroupingColumns().length > 0) { for (Column eachColumn: eachGroupbyNode.getGroupingColumns()) { if (!shuffleKeyColumns.contains(eachColumn)) { shuffleKeyColumns.add(eachColumn); } } } } return shuffleKeyColumns.toArray(new Column[]{}); } }
public void visitGroupBy(SQLBuilderContext ctx, GroupbyNode groupby, Stack<LogicalNode> stack) { visit(ctx, groupby.getChild(), stack); ctx.sb.append("GROUP BY ").append(StringUtils.join(groupby.getGroupingColumns(), ",", 0)).append(" "); }
public void visitGroupBy(SQLBuilderContext ctx, GroupbyNode groupby, Stack<LogicalNode> stack) { visit(ctx, groupby.getChild(), stack); ctx.sb.append("GROUP BY ").append(StringUtils.join(groupby.getGroupingColumns(), ",", 0)).append(" "); }
public DistinctHashAggregator(GroupbyNode groupbyNode, int nodeSequence) throws IOException { Set<Column> groupingKeySet = new HashSet<>(Arrays.asList(plan.getGroupingColumns())); List<Column> distinctGroupingKeyIndexSet = new ArrayList<>(); Column[] groupingColumns = groupbyNode.getGroupingColumns(); for (Column col : groupingColumns) { if (!groupingKeySet.contains(col)) { distinctGroupingKeyIndexSet.add(col); } } Column[] distinctKeyColumns = new Column[distinctGroupingKeyIndexSet.size()]; distinctKeyColumns = distinctGroupingKeyIndexSet.toArray(distinctKeyColumns); this.dummyTuple = NullTuple.create(distinctGroupingKeyIndexSet.size()); this.distinctAggrDatas = new TupleMap<>(); distinctGroupbyKeyProjectors.put(nodeSequence, new KeyProjector(inSchema, distinctKeyColumns)); this.tupleLength = distinctKeyColumns.length; setNodeSequence(nodeSequence); }
public AggregationExec(final TaskAttemptContext context, GroupbyNode plan, PhysicalExec child) throws IOException { super(context, plan.getInSchema(), plan.getOutSchema(), child); final Column [] keyColumns = plan.getGroupingColumns(); groupingKeyNum = keyColumns.length; if (plan.hasAggFunctions()) { aggFunctions = plan.getAggFunctions(); aggFunctionsNum = aggFunctions.size(); } else { aggFunctions = new ArrayList<>(); aggFunctionsNum = 0; } }
public DistinctHashAggregator(GroupbyNode groupbyNode, int nodeSequence) throws IOException { Set<Column> groupingKeySet = TUtil.newHashSet(plan.getGroupingColumns()); List<Column> distinctGroupingKeyIndexSet = new ArrayList<Column>(); Column[] groupingColumns = groupbyNode.getGroupingColumns(); for (int idx = 0; idx < groupingColumns.length; idx++) { Column col = groupingColumns[idx]; if (!groupingKeySet.contains(col)) { distinctGroupingKeyIndexSet.add(col); } } Column[] distinctKeyColumns = new Column[distinctGroupingKeyIndexSet.size()]; distinctKeyColumns = distinctGroupingKeyIndexSet.toArray(distinctKeyColumns); this.dummyTuple = NullTuple.create(distinctGroupingKeyIndexSet.size()); this.distinctAggrDatas = new TupleMap<TupleSet>(); distinctGroupbyKeyProjectors.put(nodeSequence, new KeyProjector(inSchema, distinctKeyColumns)); this.tupleLength = distinctKeyColumns.length; setNodeSequence(nodeSequence); }
public static List<Target> buildGroupByTarget(GroupbyNode groupbyNode, @Nullable List<Target> groupingKeyTargets, String [] aggEvalNames) { final int groupingKeyNum = groupingKeyTargets == null ? groupbyNode.getGroupingColumns().length : groupingKeyTargets.size(); final int aggrFuncNum = aggEvalNames != null ? aggEvalNames.length : 0; List<EvalNode> aggEvalNodes = (List<EvalNode>)(List<?>) groupbyNode.getAggFunctions(); List<Target> targets = new ArrayList<>(); if (groupingKeyTargets != null) { for (int groupingKeyIdx = 0; groupingKeyIdx < groupingKeyNum; groupingKeyIdx++) { targets.add(groupingKeyTargets.get(groupingKeyIdx)); } } else { for (int groupingKeyIdx = 0; groupingKeyIdx < groupingKeyNum; groupingKeyIdx++) { targets.add(new Target(new FieldEval(groupbyNode.getGroupingColumns()[groupingKeyIdx]))); } } if (aggEvalNames != null) { for (int aggrFuncIdx = 0, targetIdx = groupingKeyNum; aggrFuncIdx < aggrFuncNum; aggrFuncIdx++, targetIdx++) { targets.add(new Target(new FieldEval(aggEvalNames[aggrFuncIdx], aggEvalNodes.get(aggrFuncIdx).getValueType()))); } } return targets; }
public HashAggregator(GroupbyNode groupbyNode, Schema schema) throws IOException { hashTable = new TupleMap<>(context.getQueryContext().getInt(SessionVars.AGG_HASH_TABLE_SIZE)); List<Column> groupingKeyColumnList = new ArrayList<>(distinctGroupingKeyColumnSet); Column[] keyColumns = groupbyNode.getGroupingColumns(); for (Column keyColumn : keyColumns) { if (!distinctGroupingKeyColumnSet.contains(keyColumn)) { groupingKeyColumnList.add(keyColumn); } } Column[] groupingKeyColumns = groupingKeyColumnList.toArray(new Column[groupingKeyColumnList.size()]); if (groupbyNode.hasAggFunctions()) { aggFunctions = groupbyNode.getAggFunctions(); aggFunctionsNum = aggFunctions.size(); } else { aggFunctions = new ArrayList<>(); aggFunctionsNum = 0; } for (EvalNode aggFunction : aggFunctions) { aggFunction.bind(context.getEvalContext(), schema); } tupleSize = groupingKeyColumns.length + aggFunctionsNum; aggregatedTuple = new VTuple(groupingKeyColumns.length + aggFunctionsNum); innerKeyProjector = new KeyProjector(inSchema, groupingKeyColumns); }
public AggregationExec(final TaskAttemptContext context, GroupbyNode plan, PhysicalExec child) throws IOException { super(context, plan.getInSchema(), plan.getOutSchema(), child); final Column [] keyColumns = plan.getGroupingColumns(); groupingKeyNum = keyColumns.length; if (plan.hasAggFunctions()) { aggFunctions = plan.getAggFunctions(); aggFunctionsNum = aggFunctions.length; } else { aggFunctions = new AggregationFunctionCallEval[0]; aggFunctionsNum = 0; } }
public static Target [] buildGroupByTarget(GroupbyNode groupbyNode, @Nullable List<Target> groupingKeyTargets, String [] aggEvalNames) { final int groupingKeyNum = groupingKeyTargets == null ? groupbyNode.getGroupingColumns().length : groupingKeyTargets.size(); final int aggrFuncNum = aggEvalNames != null ? aggEvalNames.length : 0; EvalNode [] aggEvalNodes = groupbyNode.getAggFunctions(); Target [] targets = new Target[groupingKeyNum + aggrFuncNum]; if (groupingKeyTargets != null) { for (int groupingKeyIdx = 0; groupingKeyIdx < groupingKeyNum; groupingKeyIdx++) { targets[groupingKeyIdx] = groupingKeyTargets.get(groupingKeyIdx); } } else { for (int groupingKeyIdx = 0; groupingKeyIdx < groupingKeyNum; groupingKeyIdx++) { targets[groupingKeyIdx] = new Target(new FieldEval(groupbyNode.getGroupingColumns()[groupingKeyIdx])); } } if (aggEvalNames != null) { for (int aggrFuncIdx = 0, targetIdx = groupingKeyNum; aggrFuncIdx < aggrFuncNum; aggrFuncIdx++, targetIdx++) { targets[targetIdx] = new Target(new FieldEval(aggEvalNames[aggrFuncIdx], aggEvalNodes[aggrFuncIdx].getValueType())); } } return targets; }
public SortAggregateExec(TaskAttemptContext context, GroupbyNode plan, PhysicalExec child) throws IOException { super(context, plan, child); contexts = new FunctionContext[plan.getAggFunctions() == null ? 0 : plan.getAggFunctions().size()]; final Column [] keyColumns = plan.getGroupingColumns(); groupingKeyIds = new int[groupingKeyNum]; Column col; for (int idx = 0; idx < plan.getGroupingColumns().length; idx++) { col = keyColumns[idx]; if (col.hasQualifier()) { groupingKeyIds[idx] = inSchema.getColumnId(col.getQualifiedName()); } else { groupingKeyIds[idx] = inSchema.getColumnIdByName(col.getSimpleName()); } } currentKey = new VTuple(groupingKeyNum); outTuple = new VTuple(outSchema.size()); }
private PhysicalExec createBestAggregationPlan(TaskAttemptContext context, GroupbyNode groupbyNode, PhysicalExec subOp) throws IOException { Column[] grpColumns = groupbyNode.getGroupingColumns(); if (grpColumns.length == 0) { return createInMemoryHashAggregation(context, groupbyNode, subOp); } String [] outerLineage = PlannerUtil.getRelationLineage(groupbyNode.getChild()); long estimatedSize = estimateSizeRecursive(context, outerLineage); final long threshold = context.getQueryContext().getLong(SessionVars.HASH_GROUPBY_SIZE_LIMIT) * StorageUnit.MB; // if the relation size is less than the threshold, // the hash aggregation will be used. LOG.info("Aggregation:estimatedSize=" + estimatedSize + ", threshold=" + threshold); if (estimatedSize <= threshold) { LOG.info("The planner chooses [Hash Aggregation]"); return createInMemoryHashAggregation(context, groupbyNode, subOp); } else { return createSortAggregation(context, null, groupbyNode, subOp); } }
private PhysicalExec createBestAggregationPlan(TaskAttemptContext context, GroupbyNode groupbyNode, PhysicalExec subOp) throws IOException { Column[] grpColumns = groupbyNode.getGroupingColumns(); if (grpColumns.length == 0) { return createInMemoryHashAggregation(context, groupbyNode, subOp); } String [] outerLineage = PlannerUtil.getRelationLineage(groupbyNode.getChild()); long estimatedSize = estimateSizeRecursive(context, outerLineage); final long threshold = context.getQueryContext().getLong(SessionVars.HASH_GROUPBY_SIZE_LIMIT) * StorageUnit.MB; // if the relation size is less than the threshold, // the hash aggregation will be used. LOG.info("Aggregation:estimatedSize=" + estimatedSize + ", threshold=" + threshold); if (estimatedSize <= threshold) { LOG.info("The planner chooses [Hash Aggregation]"); return createInMemoryHashAggregation(context, groupbyNode, subOp); } else { return createSortAggregation(context, null, groupbyNode, subOp); } }
public SortAggregateExec(TaskAttemptContext context, GroupbyNode plan, PhysicalExec child) throws IOException { super(context, plan, child); contexts = new FunctionContext[plan.getAggFunctions() == null ? 0 : plan.getAggFunctions().length]; final Column [] keyColumns = plan.getGroupingColumns(); groupingKeyIds = new int[groupingKeyNum]; Column col; for (int idx = 0; idx < plan.getGroupingColumns().length; idx++) { col = keyColumns[idx]; if (col.hasQualifier()) { groupingKeyIds[idx] = inSchema.getColumnId(col.getQualifiedName()); } else { groupingKeyIds[idx] = inSchema.getColumnIdByName(col.getSimpleName()); } } currentKey = new VTuple(groupingKeyNum); outTuple = new VTuple(outSchema.size()); }
public HashAggregateExec(TaskAttemptContext ctx, GroupbyNode plan, PhysicalExec subOp) throws IOException { super(ctx, plan, subOp); hashKeyProjector = new KeyProjector(inSchema, plan.getGroupingColumns()); hashTable = new TupleMap<>(ctx.getQueryContext().getInt(SessionVars.AGG_HASH_TABLE_SIZE)); this.tuple = new VTuple(plan.getOutSchema().size()); }
public HashAggregateExec(TaskAttemptContext ctx, GroupbyNode plan, PhysicalExec subOp) throws IOException { super(ctx, plan, subOp); hashKeyProjector = new KeyProjector(inSchema, plan.getGroupingColumns()); hashTable = new TupleMap<>(ctx.getQueryContext().getInt(SessionVars.AGG_HASH_TABLE_SIZE)); this.tuple = new VTuple(plan.getOutSchema().size()); }
private void setDistinctAggregationEnforcer( ExecutionBlock firstStageBlock, DistinctGroupbyNode firstStageDistinctNode, ExecutionBlock secondStageBlock, DistinctGroupbyNode secondStageDistinctNode) { firstStageBlock.getEnforcer().enforceDistinctAggregation(firstStageDistinctNode.getPID(), DistinctAggregationAlgorithm.HASH_AGGREGATION, null); List<SortSpecArray> sortSpecArrays = new ArrayList<SortSpecArray>(); int index = 0; for (GroupbyNode groupbyNode: firstStageDistinctNode.getSubPlans()) { List<SortSpecProto> sortSpecs = new ArrayList<SortSpecProto>(); for (Column column: groupbyNode.getGroupingColumns()) { sortSpecs.add(SortSpecProto.newBuilder().setColumn(column.getProto()).build()); } sortSpecArrays.add( SortSpecArray.newBuilder() .setNodeId(secondStageDistinctNode.getSubPlans().get(index).getPID()) .addAllSortSpecs(sortSpecs).build()); } secondStageBlock.getEnforcer().enforceDistinctAggregation(secondStageDistinctNode.getPID(), DistinctAggregationAlgorithm.SORT_AGGREGATION, sortSpecArrays); }
private void setDistinctAggregationEnforcer( ExecutionBlock firstStageBlock, DistinctGroupbyNode firstStageDistinctNode, ExecutionBlock secondStageBlock, DistinctGroupbyNode secondStageDistinctNode) { firstStageBlock.getEnforcer().enforceDistinctAggregation(firstStageDistinctNode.getPID(), DistinctAggregationAlgorithm.HASH_AGGREGATION, null); List<SortSpecArray> sortSpecArrays = new ArrayList<>(); int index = 0; for (GroupbyNode groupbyNode: firstStageDistinctNode.getSubPlans()) { List<SortSpecProto> sortSpecs = new ArrayList<>(); for (Column column: groupbyNode.getGroupingColumns()) { sortSpecs.add(SortSpecProto.newBuilder().setColumn(column.getProto()).build()); } sortSpecArrays.add( SortSpecArray.newBuilder() .setNodeId(secondStageDistinctNode.getSubPlans().get(index).getPID()) .addAllSortSpecs(sortSpecs).build()); } secondStageBlock.getEnforcer().enforceDistinctAggregation(secondStageDistinctNode.getPID(), DistinctAggregationAlgorithm.SORT_AGGREGATION, sortSpecArrays); }
private PlanProto.LogicalNode.Builder buildGroupby(SerializeContext context, GroupbyNode node) throws TajoException { int [] childIds = registerGetChildIds(context, node); PlanProto.GroupbyNode.Builder groupbyBuilder = PlanProto.GroupbyNode.newBuilder(); groupbyBuilder.setChildSeq(childIds[0]); groupbyBuilder.setDistinct(node.isDistinct()); if (node.groupingKeyNum() > 0) { groupbyBuilder.addAllGroupingKeys( ProtoUtil.<CatalogProtos.ColumnProto>toProtoObjects(node.getGroupingColumns())); } if (node.hasAggFunctions()) { groupbyBuilder.addAllAggFunctions( ProtoUtil.<PlanProto.EvalNodeTree>toProtoObjects(node.getAggFunctions().toArray(new ProtoObject[node.getAggFunctions().size()]))); } if (node.hasTargets()) { groupbyBuilder.addAllTargets(ProtoUtil.<PlanProto.Target>toProtoObjects(node.getTargets().toArray(new ProtoObject[node.getTargets().size()]))); } PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, node); nodeBuilder.setGroupby(groupbyBuilder); return nodeBuilder; }
private PlanProto.LogicalNode.Builder buildGroupby(SerializeContext context, GroupbyNode node) throws TajoException { int [] childIds = registerGetChildIds(context, node); PlanProto.GroupbyNode.Builder groupbyBuilder = PlanProto.GroupbyNode.newBuilder(); groupbyBuilder.setChildSeq(childIds[0]); groupbyBuilder.setDistinct(node.isDistinct()); if (node.groupingKeyNum() > 0) { groupbyBuilder.addAllGroupingKeys( ProtoUtil.<CatalogProtos.ColumnProto>toProtoObjects(node.getGroupingColumns())); } if (node.hasAggFunctions()) { groupbyBuilder.addAllAggFunctions( ProtoUtil.<PlanProto.EvalNodeTree>toProtoObjects(node.getAggFunctions())); } if (node.hasTargets()) { groupbyBuilder.addAllTargets(ProtoUtil.<PlanProto.Target>toProtoObjects(node.getTargets())); } PlanProto.LogicalNode.Builder nodeBuilder = createNodeBuilder(context, node); nodeBuilder.setGroupby(groupbyBuilder); return nodeBuilder; }