protected FilterSelectivityEstimator(RelNode childRel) { super(true); this.childRel = childRel; this.childCardinality = RelMetadataQuery.instance().getRowCount(childRel); }
public static boolean isFittingIntoMemory(Double maxSize, RelNode input, int buckets) { Double currentMemory = RelMetadataQuery.instance().cumulativeMemoryWithinPhase(input); if (currentMemory != null) { if(currentMemory / buckets > maxSize) { return false; } return true; } return false; }
public MapJoinStreamingRelation getStreamingSide() { RelMetadataQuery mq = RelMetadataQuery.instance(); Double leftInputSize = mq.memory(left); Double rightInputSize = mq.memory(right); if (leftInputSize == null && rightInputSize == null) { return MapJoinStreamingRelation.NONE; } else if (leftInputSize != null && (rightInputSize == null || (leftInputSize < rightInputSize))) { return MapJoinStreamingRelation.RIGHT_RELATION; } else if (rightInputSize != null && (leftInputSize == null || (rightInputSize <= leftInputSize))) { return MapJoinStreamingRelation.LEFT_RELATION; } return MapJoinStreamingRelation.NONE; }
@Override public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { JoinAlgorithm oldAlgo = join.getJoinAlgorithm(); join.setJoinAlgorithm(TezCommonJoinAlgorithm.INSTANCE); final Double memoryWithinPhase = RelMetadataQuery.instance().cumulativeMemoryWithinPhase(join); final Integer splitCount = RelMetadataQuery.instance().splitCount(join); join.setJoinAlgorithm(oldAlgo); if (memoryWithinPhase == null || splitCount == null) { return null; } return memoryWithinPhase / splitCount; }
public boolean isBucketedInput() { return RelMetadataQuery.instance().distribution(this.getInput()).getKeys(). containsAll(groupSet.asList()); }
@Override public RelOptCost getScanCost(HiveTableScan ts) { return algoUtils.computeScanCost(ts.getRows(), RelMetadataQuery.instance().getAverageRowSize(ts)); }
@Override public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { // Check streaming side RelNode inMemoryInput; if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) { inMemoryInput = join.getRight(); } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) { inMemoryInput = join.getLeft(); } else { return null; } // If bucket map join, only a split goes in memory final Double memoryInput = RelMetadataQuery.instance().cumulativeMemoryWithinPhase(inMemoryInput); final Integer splitCount = RelMetadataQuery.instance().splitCount(inMemoryInput); if (memoryInput == null || splitCount == null) { return null; } return memoryInput / splitCount; }
public static Integer getSplitCountWithoutRepartition(HiveJoin join) { RelNode largeInput; if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) { largeInput = join.getLeft(); } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) { largeInput = join.getRight(); } else { return null; } return RelMetadataQuery.instance().splitCount(largeInput); }
@Override public RelOptCost getCost(HiveJoin join) { RelMetadataQuery mq = RelMetadataQuery.instance(); double leftRCount = mq.getRowCount(join.getLeft()); double rightRCount = mq.getRowCount(join.getRight()); return HiveCost.FACTORY.makeCost(leftRCount + rightRCount, 0.0, 0.0); }
public ImmutableBitSet getSortedInputs() throws CalciteSemanticException { ImmutableBitSet.Builder sortedInputsBuilder = ImmutableBitSet.builder(); JoinPredicateInfo joinPredInfo = HiveCalciteUtil.JoinPredicateInfo. constructJoinPredicateInfo(this); List<ImmutableIntList> joinKeysInChildren = new ArrayList<ImmutableIntList>(); joinKeysInChildren.add( ImmutableIntList.copyOf( joinPredInfo.getProjsFromLeftPartOfJoinKeysInChildSchema())); joinKeysInChildren.add( ImmutableIntList.copyOf( joinPredInfo.getProjsFromRightPartOfJoinKeysInChildSchema())); for (int i=0; i<this.getInputs().size(); i++) { boolean correctOrderFound = RelCollations.contains( RelMetadataQuery.instance().collations(this.getInputs().get(i)), joinKeysInChildren.get(i)); if (correctOrderFound) { sortedInputsBuilder.set(i); } } return sortedInputsBuilder.build(); }
public static Integer getSplitCountWithRepartition(HiveJoin join) { final Double maxSplitSize = join.getCluster().getPlanner().getContext(). unwrap(HiveAlgorithmsConf.class).getMaxSplitSize(); // We repartition: new number of splits RelMetadataQuery mq = RelMetadataQuery.instance(); final Double averageRowSize = mq.getAverageRowSize(join); final Double rowCount = mq.getRowCount(join); if (averageRowSize == null || rowCount == null) { return null; } final Double totalSize = averageRowSize * rowCount; final Double splitCount = totalSize / maxSplitSize; return splitCount.intValue(); }
RelDistribution distribution = RelMetadataQuery.instance().distribution(input); if (distribution.getType() != Type.HASH_DISTRIBUTED) { return false;
@Override public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { // Check streaming side RelNode inMemoryInput; if (join.getStreamingSide() == MapJoinStreamingRelation.LEFT_RELATION) { inMemoryInput = join.getRight(); } else if (join.getStreamingSide() == MapJoinStreamingRelation.RIGHT_RELATION) { inMemoryInput = join.getLeft(); } else { return null; } // If simple map join, the whole relation goes in memory return RelMetadataQuery.instance().cumulativeMemoryWithinPhase(inMemoryInput); }
@Override public Double getCumulativeMemoryWithinPhaseSplit(HiveJoin join) { RelMetadataQuery mq = RelMetadataQuery.instance(); // TODO: Split count is not same as no of buckets JoinAlgorithm oldAlgo = join.getJoinAlgorithm(); join.setJoinAlgorithm(TezSMBJoinAlgorithm.INSTANCE); final Double memoryWithinPhase = mq.cumulativeMemoryWithinPhase(join); final Integer splitCount = mq.splitCount(join); join.setJoinAlgorithm(oldAlgo); if (memoryWithinPhase == null || splitCount == null) { return null; } return memoryWithinPhase / splitCount; }
@Override public boolean matches(RelOptRuleCall call) { final HiveSortLimit sortLimit = call.rel(0); // If it is not created by HiveSortJoinReduceRule, we cannot remove it if (!sortLimit.isRuleCreated()) { return false; } // Finally, if we do not reduce the size input enough, we bail out int limit = RexLiteral.intValue(sortLimit.fetch); Double rowCount = RelMetadataQuery.instance().getRowCount(sortLimit.getInput()); if (rowCount != null && limit <= reductionProportion * rowCount && rowCount - limit >= reductionTuples) { return false; } return true; }
private Double getMaxNDV(RexCall call) { double tmpNDV; double maxNDV = 1.0; InputReferencedVisitor irv; RelMetadataQuery mq = RelMetadataQuery.instance(); for (RexNode op : call.getOperands()) { if (op instanceof RexInputRef) { tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, mq, ((RexInputRef) op).getIndex()); if (tmpNDV > maxNDV) maxNDV = tmpNDV; } else { irv = new InputReferencedVisitor(); irv.apply(op); for (Integer childProjIndx : irv.inputPosReferenced) { tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, mq, childProjIndx); if (tmpNDV > maxNDV) maxNDV = tmpNDV; } } } return maxNDV; }
public static Double getJoinMemory(HiveJoin join, MapJoinStreamingRelation streamingSide) { Double memory = 0.0; RelMetadataQuery mq = RelMetadataQuery.instance(); if (streamingSide == MapJoinStreamingRelation.NONE || streamingSide == MapJoinStreamingRelation.RIGHT_RELATION) { // Left side final Double leftAvgRowSize = mq.getAverageRowSize(join.getLeft()); final Double leftRowCount = mq.getRowCount(join.getLeft()); if (leftAvgRowSize == null || leftRowCount == null) { return null; } memory += leftAvgRowSize * leftRowCount; } if (streamingSide == MapJoinStreamingRelation.NONE || streamingSide == MapJoinStreamingRelation.LEFT_RELATION) { // Right side final Double rightAvgRowSize = mq.getAverageRowSize(join.getRight()); final Double rightRowCount = mq.getRowCount(join.getRight()); if (rightAvgRowSize == null || rightRowCount == null) { return null; } memory += rightAvgRowSize * rightRowCount; } return memory; }
@Override public void onMatch(RelOptRuleCall call) { final Filter filter = call.rel(0); final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); final RelMetadataQuery metadataProvider = RelMetadataQuery.instance(); // 1. Recompose filter possibly by pulling out common elements from DNF // expressions RexNode newFilterCondition = RexUtil.pullFactors(rexBuilder, filter.getCondition()); // 2. Reduce filter with stats information RexReplacer replacer = new RexReplacer(filter, rexBuilder, metadataProvider); newFilterCondition = replacer.apply(newFilterCondition); // 3. Transform if we have created a new filter operator if (!filter.getCondition().toString().equals(newFilterCondition.toString())) { Filter newFilter = filter.copy(filter.getTraitSet(), filter.getInput(), newFilterCondition); call.transformTo(newFilter); } }
for (RelNode input : union.getInputs()) { if (RexLiteral.intValue(sort.fetch) + offset < RelMetadataQuery.instance().getRowCount(input)) { finishPushSortPastUnion = false;
@Override public RelOptCost getAggregateCost(HiveAggregate aggregate) { if (aggregate.isBucketedInput()) { return HiveCost.FACTORY.makeZeroCost(); } else { RelMetadataQuery mq = RelMetadataQuery.instance(); // 1. Sum of input cardinalities final Double rCount = mq.getRowCount(aggregate.getInput()); if (rCount == null) { return null; } // 2. CPU cost = sorting cost final double cpuCost = algoUtils.computeSortCPUCost(rCount); // 3. IO cost = cost of writing intermediary results to local FS + // cost of reading from local FS for transferring to GBy + // cost of transferring map outputs to GBy operator final Double rAverageSize = mq.getAverageRowSize(aggregate.getInput()); if (rAverageSize == null) { return null; } final double ioCost = algoUtils.computeSortIOCost(new Pair<Double,Double>(rCount,rAverageSize)); // 4. Result return HiveCost.FACTORY.makeCost(rCount, cpuCost, ioCost); } }