public static PlanNodeCostEstimate calculateJoinCostWithoutOutput( PlanNode probe, PlanNode build, StatsProvider stats, TypeProvider types, boolean replicated, int estimatedSourceDistributedTaskCount) { PlanNodeCostEstimate exchangesCost = calculateJoinExchangeCost( probe, build, stats, types, replicated, estimatedSourceDistributedTaskCount); PlanNodeCostEstimate inputCost = calculateJoinInputCost( probe, build, stats, types, replicated, estimatedSourceDistributedTaskCount); return exchangesCost.add(inputCost); }
return PlanNodeCostEstimate.zero(); case REPARTITION: return calculateLocalRepartitionCost(inputSizeInBytes); case REPLICATE: return PlanNodeCostEstimate.zero(); switch (node.getType()) { case GATHER: return calculateRemoteGatherCost(inputSizeInBytes); case REPARTITION: return calculateRemoteRepartitionCost(inputSizeInBytes); case REPLICATE: return calculateRemoteReplicateCost(inputSizeInBytes, taskCountEstimator.estimateSourceDistributedTaskCount()); default: throw new IllegalArgumentException("Unexpected type: " + node.getType());
@Override public PlanNodeCostEstimate visitAggregation(AggregationNode node, Void context) { PlanNode source = node.getSource(); double inputSizeInBytes = getStats(source).getOutputSizeInBytes(source.getOutputSymbols(), types); PlanNodeCostEstimate remoteRepartitionCost = calculateRemoteRepartitionCost(inputSizeInBytes); PlanNodeCostEstimate localRepartitionCost = calculateLocalRepartitionCost(inputSizeInBytes); // TODO consider cost of aggregation itself, not only exchanges, based on aggregation's properties return remoteRepartitionCost.add(localRepartitionCost); }
private static PlanNodeCostEstimate calculateJoinExchangeCost( PlanNode probe, PlanNode build, StatsProvider stats, TypeProvider types, boolean replicated, int estimatedSourceDistributedTaskCount) { double probeSizeInBytes = stats.getStats(probe).getOutputSizeInBytes(probe.getOutputSymbols(), types); double buildSizeInBytes = stats.getStats(build).getOutputSizeInBytes(build.getOutputSymbols(), types); if (replicated) { // assuming the probe side of a replicated join is always source distributed PlanNodeCostEstimate replicateCost = calculateRemoteReplicateCost(buildSizeInBytes, estimatedSourceDistributedTaskCount); // cost of the copies repartitioning is added in CostCalculatorUsingExchanges#calculateJoinCost PlanNodeCostEstimate localRepartitionCost = calculateLocalRepartitionCost(buildSizeInBytes); return replicateCost.add(localRepartitionCost); } else { PlanNodeCostEstimate probeCost = calculateRemoteRepartitionCost(probeSizeInBytes); PlanNodeCostEstimate buildRemoteRepartitionCost = calculateRemoteRepartitionCost(buildSizeInBytes); PlanNodeCostEstimate buildLocalRepartitionCost = calculateLocalRepartitionCost(buildSizeInBytes); return probeCost .add(buildRemoteRepartitionCost) .add(buildLocalRepartitionCost); } }
private QueryExplainer getQueryExplainer() { Metadata metadata = queryRunner.getMetadata(); FeaturesConfig featuresConfig = new FeaturesConfig().setOptimizeHashGeneration(true); boolean forceSingleNode = queryRunner.getNodeCount() == 1; TaskCountEstimator taskCountEstimator = new TaskCountEstimator(queryRunner::getNodeCount); CostCalculator costCalculator = new CostCalculatorUsingExchanges(taskCountEstimator); List<PlanOptimizer> optimizers = new PlanOptimizers( metadata, sqlParser, featuresConfig, forceSingleNode, new MBeanExporter(new TestingMBeanServer()), queryRunner.getSplitManager(), queryRunner.getPageSourceManager(), queryRunner.getStatsCalculator(), costCalculator, new CostCalculatorWithEstimatedExchanges(costCalculator, taskCountEstimator), new CostComparator(featuresConfig), taskCountEstimator).get(); return new QueryExplainer( optimizers, new PlanFragmenter(metadata, queryRunner.getNodePartitioningManager(), new QueryManagerConfig()), metadata, queryRunner.getAccessControl(), sqlParser, queryRunner.getStatsCalculator(), costCalculator, ImmutableMap.of()); }
PlanNodeCostEstimate cost = calculateJoinCostWithoutOutput( possibleJoinNode.getLeft(), possibleJoinNode.getRight(),
@Override public PlanNodeCostEstimate visitUnion(UnionNode node, Void context) { // this assumes that all union inputs will be gathered over the network // that is not aways true // but this estimate is better that returning UNKNOWN, as it sets // cumulative cost to unknown double inputSizeInBytes = getStats(node).getOutputSizeInBytes(node.getOutputSymbols(), types); return calculateRemoteGatherCost(inputSizeInBytes); }
private PlanNodeCostEstimate calculateJoinCost(PlanNode join, PlanNode probe, PlanNode build, boolean replicated) { PlanNodeCostEstimate joinInputCost = calculateJoinInputCost( probe, build, stats, types, replicated, taskCountEstimator.estimateSourceDistributedTaskCount()); PlanNodeCostEstimate joinOutputCost = calculateJoinOutputCost(join); return joinInputCost.add(joinOutputCost); }
this.taskCountEstimator = new TaskCountEstimator(() -> nodeCountForStats); this.costCalculator = new CostCalculatorUsingExchanges(taskCountEstimator); this.estimatedExchangesCostCalculator = new CostCalculatorWithEstimatedExchanges(costCalculator, taskCountEstimator); this.accessControl = new TestingAccessControlManager(transactionManager); this.pageSourceManager = new PageSourceManager();
PlanNodeCostEstimate cost = calculateJoinCostWithoutOutput( possibleJoinNode.getSource(), possibleJoinNode.getFilteringSource(),
@BeforeClass public void setUp() { TaskCountEstimator taskCountEstimator = new TaskCountEstimator(() -> NUMBER_OF_NODES); costCalculatorUsingExchanges = new CostCalculatorUsingExchanges(taskCountEstimator); costCalculatorWithEstimatedExchanges = new CostCalculatorWithEstimatedExchanges(costCalculatorUsingExchanges, taskCountEstimator); session = testSessionBuilder().setCatalog("tpch").build(); CatalogManager catalogManager = new CatalogManager(); catalogManager.registerCatalog(createBogusTestingCatalog("tpch")); transactionManager = createTestTransactionManager(catalogManager); metadata = createTestMetadataManager(transactionManager, new FeaturesConfig()); finalizerService = new FinalizerService(); finalizerService.start(); nodeScheduler = new NodeScheduler( new LegacyNetworkTopology(), new InMemoryNodeManager(), new NodeSchedulerConfig().setIncludeCoordinator(true), new NodeTaskMap(finalizerService)); nodePartitioningManager = new NodePartitioningManager(nodeScheduler); planFragmenter = new PlanFragmenter(metadata, nodePartitioningManager, new QueryManagerConfig()); }
private QueryExplainer getQueryExplainer() { Metadata metadata = queryRunner.getMetadata(); FeaturesConfig featuresConfig = new FeaturesConfig().setOptimizeHashGeneration(true); boolean forceSingleNode = queryRunner.getNodeCount() == 1; TaskCountEstimator taskCountEstimator = new TaskCountEstimator(queryRunner::getNodeCount); CostCalculator costCalculator = new CostCalculatorUsingExchanges(taskCountEstimator); List<PlanOptimizer> optimizers = new PlanOptimizers( metadata, sqlParser, featuresConfig, forceSingleNode, new MBeanExporter(new TestingMBeanServer()), queryRunner.getSplitManager(), queryRunner.getPageSourceManager(), queryRunner.getStatsCalculator(), costCalculator, new CostCalculatorWithEstimatedExchanges(costCalculator, taskCountEstimator), new CostComparator(featuresConfig), taskCountEstimator).get(); return new QueryExplainer( optimizers, new PlanFragmenter(metadata, queryRunner.getNodePartitioningManager(), new QueryManagerConfig()), metadata, queryRunner.getAccessControl(), sqlParser, queryRunner.getStatsCalculator(), costCalculator, ImmutableMap.of()); }