@Test public void testCompileKMeansSingleStepWithStats() { Plan p = getKMeansPlan(); p.setExecutionConfig(new ExecutionConfig()); // set the statistics OperatorResolver cr = getContractResolver(p); GenericDataSourceBase<?, ?> pointsSource = cr.getNode(DATAPOINTS); GenericDataSourceBase<?, ?> centersSource = cr.getNode(CENTERS); setSourceStatistics(pointsSource, 100L * 1024 * 1024 * 1024, 32f); setSourceStatistics(centersSource, 1024 * 1024, 32f); OptimizedPlan plan = compileWithStats(p); checkPlan(plan); }
/** * Statistics that push towards a repartition merge join. If the join blows the data volume up significantly, * re-exploiting the sorted order is cheaper. */ @Test public void testQueryWithStatsForRepartitionMerge() { Plan p = getTPCH3Plan(); p.setExecutionConfig(defaultExecutionConfig); // set compiler hints OperatorResolver cr = getContractResolver(p); DualInputOperator<?, ?, ?, ?> match = cr.getNode(JOIN_NAME); match.getCompilerHints().setFilterFactor(100f); testQueryGeneric(100L * 1024 * 1024 * 1024 * 1024, 100L * 1024 * 1024 * 1024 * 1024, 0.01f, 100f, false, true, false, false, true); }
GenericDataSourceBase<?, ?> ordersSource = cr.getNode(ORDERS); GenericDataSourceBase<?, ?> lineItemSource = cr.getNode(LINEITEM); SingleInputOperator<?, ?, ?> mapper = cr.getNode(MAPPER_NAME); DualInputOperator<?, ?, ?, ?> joiner = cr.getNode(JOIN_NAME); setSourceStatistics(ordersSource, orderSize, 100f); setSourceStatistics(lineItemSource, lineitemSize, 140f);
GenericDataSourceBase<?, ?> source = getContractResolver(p).getNode("Input Lines"); setSourceStatistics(source, 1024 * 1024 * 1024 * 1024L, 24f); plan = compileWithStats(p);