@Override public PlanNodeCostEstimate visitAssignUniqueId(AssignUniqueId node, Void context) { return cpuCost(getStats(node).getOutputSizeInBytes(ImmutableList.of(node.getIdColumn()), types)); }
private PlanNodeCostEstimate calculateJoinOutputCost(PlanNode join) { PlanNodeStatsEstimate outputStats = getStats(join); double joinOutputSize = outputStats.getOutputSizeInBytes(join.getOutputSymbols(), types); return cpuCost(joinOutputSize); }
@Override public PlanNodeCostEstimate visitRowNumber(RowNumberNode node, Void context) { List<Symbol> symbols = node.getOutputSymbols(); // when maxRowCountPerPartition is set, the RowNumberOperator // copies values for all the columns into a page builder if (!node.getMaxRowCountPerPartition().isPresent()) { symbols = ImmutableList.<Symbol>builder() .addAll(node.getPartitionBy()) .add(node.getRowNumberSymbol()) .build(); } PlanNodeStatsEstimate stats = getStats(node); double cpuCost = stats.getOutputSizeInBytes(symbols, types); double memoryCost = node.getPartitionBy().isEmpty() ? 0 : stats.getOutputSizeInBytes(node.getSource().getOutputSymbols(), types); return new PlanNodeCostEstimate(cpuCost, memoryCost, 0); }
@Override public PlanNodeCostEstimate visitTableScan(TableScanNode node, Void context) { // TODO: add network cost, based on input size in bytes? Or let connector provide this cost? return cpuCost(getStats(node).getOutputSizeInBytes(node.getOutputSymbols(), types)); }
@Override public PlanNodeCostEstimate visitProject(ProjectNode node, Void context) { return cpuCost(getStats(node).getOutputSizeInBytes(node.getOutputSymbols(), types)); }
@Override public PlanNodeCostEstimate visitLimit(LimitNode node, Void context) { // This is just a wild guess. First of all, LimitNode is rather rare except as a top node of a query plan, // so proper cost estimation is not that important. Second, since LimitNode can lead to incomplete evaluation // of the source, true cost estimation should be implemented as a "constraint" enforced on a sub-tree and // evaluated in context of actual source node type (and their sources). return cpuCost(getStats(node).getOutputSizeInBytes(node.getOutputSymbols(), types)); }
@Override public PlanNodeCostEstimate visitUnion(UnionNode node, Void context) { // this assumes that all union inputs will be gathered over the network // that is not aways true // but this estimate is better that returning UNKNOWN, as it sets // cumulative cost to unknown double inputSizeInBytes = getStats(node).getOutputSizeInBytes(node.getOutputSymbols(), types); return calculateRemoteGatherCost(inputSizeInBytes); }
@Override public PlanNodeCostEstimate visitFilter(FilterNode node, Void context) { return cpuCost(getStats(node.getSource()).getOutputSizeInBytes(node.getOutputSymbols(), types)); }
PlanNodeStatsEstimate buildStats = stats.getStats(build); double buildSideSize = buildStats.getOutputSizeInBytes(build.getOutputSymbols(), types); double probeSideSize = probeStats.getOutputSizeInBytes(probe.getOutputSymbols(), types);
@Override public PlanNodeCostEstimate visitAggregation(AggregationNode node, Void context) { if (node.getStep() != FINAL && node.getStep() != SINGLE) { return PlanNodeCostEstimate.unknown(); } PlanNodeStatsEstimate aggregationStats = getStats(node); PlanNodeStatsEstimate sourceStats = getStats(node.getSource()); double cpuCost = sourceStats.getOutputSizeInBytes(node.getSource().getOutputSymbols(), types); double memoryCost = aggregationStats.getOutputSizeInBytes(node.getOutputSymbols(), types); return new PlanNodeCostEstimate(cpuCost, memoryCost, 0); }
private String formatPlanNodeStatsAndCost(PlanNode node) { PlanNodeStatsEstimate stats = estimatedStatsAndCosts.getStats().getOrDefault(node.getId(), PlanNodeStatsEstimate.unknown()); PlanNodeCostEstimate cost = estimatedStatsAndCosts.getCosts().getOrDefault(node.getId(), PlanNodeCostEstimate.unknown()); return format("{rows: %s (%s), cpu: %s, memory: %s, network: %s}", formatAsLong(stats.getOutputRowCount()), formatEstimateAsDataSize(stats.getOutputSizeInBytes(node.getOutputSymbols(), types)), formatDouble(cost.getCpuCost()), formatDouble(cost.getMemoryCost()), formatDouble(cost.getNetworkCost())); } }
private boolean canReplicate(SemiJoinNode node, Context context) { Optional<DataSize> joinMaxBroadcastTableSize = getJoinMaxBroadcastTableSize(context.getSession()); if (!joinMaxBroadcastTableSize.isPresent()) { return true; } PlanNode buildSide = node.getFilteringSource(); PlanNodeStatsEstimate buildSideStatsEstimate = context.getStatsProvider().getStats(buildSide); double buildSideSizeInBytes = buildSideStatsEstimate.getOutputSizeInBytes(buildSide.getOutputSymbols(), context.getSymbolAllocator().getTypes()); return buildSideSizeInBytes <= joinMaxBroadcastTableSize.get().toBytes(); }
@Override public PlanNodeCostEstimate visitAggregation(AggregationNode node, Void context) { PlanNode source = node.getSource(); double inputSizeInBytes = getStats(source).getOutputSizeInBytes(source.getOutputSymbols(), types); PlanNodeCostEstimate remoteRepartitionCost = calculateRemoteRepartitionCost(inputSizeInBytes); PlanNodeCostEstimate localRepartitionCost = calculateLocalRepartitionCost(inputSizeInBytes); // TODO consider cost of aggregation itself, not only exchanges, based on aggregation's properties return remoteRepartitionCost.add(localRepartitionCost); }
public static boolean canReplicate(JoinNode joinNode, Context context) { JoinDistributionType joinDistributionType = getJoinDistributionType(context.getSession()); if (!joinDistributionType.canReplicate()) { return false; } Optional<DataSize> joinMaxBroadcastTableSize = getJoinMaxBroadcastTableSize(context.getSession()); if (!joinMaxBroadcastTableSize.isPresent()) { return true; } PlanNode buildSide = joinNode.getRight(); PlanNodeStatsEstimate buildSideStatsEstimate = context.getStatsProvider().getStats(buildSide); double buildSideSizeInBytes = buildSideStatsEstimate.getOutputSizeInBytes(buildSide.getOutputSymbols(), context.getSymbolAllocator().getTypes()); return buildSideSizeInBytes <= joinMaxBroadcastTableSize.get().toBytes(); }
private static PlanNodeCostEstimate calculateJoinExchangeCost( PlanNode probe, PlanNode build, StatsProvider stats, TypeProvider types, boolean replicated, int estimatedSourceDistributedTaskCount) { double probeSizeInBytes = stats.getStats(probe).getOutputSizeInBytes(probe.getOutputSymbols(), types); double buildSizeInBytes = stats.getStats(build).getOutputSizeInBytes(build.getOutputSymbols(), types); if (replicated) { // assuming the probe side of a replicated join is always source distributed PlanNodeCostEstimate replicateCost = calculateRemoteReplicateCost(buildSizeInBytes, estimatedSourceDistributedTaskCount); // cost of the copies repartitioning is added in CostCalculatorUsingExchanges#calculateJoinCost PlanNodeCostEstimate localRepartitionCost = calculateLocalRepartitionCost(buildSizeInBytes); return replicateCost.add(localRepartitionCost); } else { PlanNodeCostEstimate probeCost = calculateRemoteRepartitionCost(probeSizeInBytes); PlanNodeCostEstimate buildRemoteRepartitionCost = calculateRemoteRepartitionCost(buildSizeInBytes); PlanNodeCostEstimate buildLocalRepartitionCost = calculateLocalRepartitionCost(buildSizeInBytes); return probeCost .add(buildRemoteRepartitionCost) .add(buildLocalRepartitionCost); } }
@Override public PlanNodeCostEstimate visitExchange(ExchangeNode node, Void context) double inputSizeInBytes = getStats(node).getOutputSizeInBytes(node.getOutputSymbols(), types); switch (node.getScope()) { case LOCAL: