@Test public void testReplicateScalar() { assertDetermineJoinDistributionType() .on(p -> p.join( INNER, p.values(ImmutableList.of(p.symbol("A1")), ImmutableList.of(expressions("10"), expressions("11"))), p.enforceSingleRow( p.values(ImmutableList.of(p.symbol("B1")), ImmutableList.of(expressions("50"), expressions("11")))), ImmutableList.of(new JoinNode.EquiJoinClause(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT))), ImmutableList.of(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT)), Optional.empty())) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.PARTITIONED.name()) .matches(join( INNER, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(DistributionType.REPLICATED), values(ImmutableMap.of("A1", 0)), enforceSingleRow(values(ImmutableMap.of("B1", 0))))); }
public static boolean canReplicate(JoinNode joinNode, Context context) { JoinDistributionType joinDistributionType = getJoinDistributionType(context.getSession()); if (!joinDistributionType.canReplicate()) { return false; } Optional<DataSize> joinMaxBroadcastTableSize = getJoinMaxBroadcastTableSize(context.getSession()); if (!joinMaxBroadcastTableSize.isPresent()) { return true; } PlanNode buildSide = joinNode.getRight(); PlanNodeStatsEstimate buildSideStatsEstimate = context.getStatsProvider().getStats(buildSide); double buildSideSizeInBytes = buildSideStatsEstimate.getOutputSizeInBytes(buildSide.getOutputSymbols(), context.getSymbolAllocator().getTypes()); return buildSideSizeInBytes <= joinMaxBroadcastTableSize.get().toBytes(); }
@Test public void testJoinWithStatefulFilterFunction() { super.testJoinWithStatefulFilterFunction(); // Stateful function is placed in LEFT JOIN's ON clause and involves left & right symbols to prevent any kind of push down/pull down. Session session = Session.builder(getSession()) // With broadcast join, lineitem would be source-distributed and not executed concurrently. .setSystemProperty(SystemSessionProperties.JOIN_DISTRIBUTION_TYPE, JoinDistributionType.PARTITIONED.toString()) .build(); long joinOutputRowCount = 60175; assertQuery( session, format( "SELECT count(*) FROM lineitem l LEFT OUTER JOIN orders o ON l.orderkey = o.orderkey AND stateful_sleeping_sum(%s, 100, l.linenumber, o.shippriority) > 0", 10 * 1. / joinOutputRowCount), format("VALUES %s", joinOutputRowCount)); }
JOIN_DISTRIBUTION_TYPE, format("The join method to use. Options are %s", Stream.of(JoinDistributionType.values()) .map(JoinDistributionType::name) .collect(joining(","))), featuresConfig.getJoinDistributionType(), false, value -> JoinDistributionType.valueOf(((String) value).toUpperCase()), JoinDistributionType::name), new PropertyMetadata<>(
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()) .setSystemProperty(COLOCATED_JOIN, "true") .setSystemProperty(GROUPED_EXECUTION_FOR_AGGREGATION, "true")
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()) .setSystemProperty(COLOCATED_JOIN, "true") .setSystemProperty(GROUPED_EXECUTION_FOR_AGGREGATION, "true")
JOIN_DISTRIBUTION_TYPE, format("The join method to use. Options are %s", Stream.of(JoinDistributionType.values()) .map(JoinDistributionType::name) .collect(joining(","))), featuresConfig.getJoinDistributionType(), false, value -> JoinDistributionType.valueOf(((String) value).toUpperCase()), JoinDistributionType::name), new PropertyMetadata<>(
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.BROADCAST.name()) .setSystemProperty(FORCE_SINGLE_NODE_OUTPUT, Boolean.toString(false)) .setSystemProperty(OPTIMIZE_HASH_GENERATION, Boolean.toString(false))
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.BROADCAST.name()) .setSystemProperty(FORCE_SINGLE_NODE_OUTPUT, Boolean.toString(false)) .build();
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.BROADCAST.name()) .setSystemProperty(FORCE_SINGLE_NODE_OUTPUT, Boolean.toString(false)) .setSystemProperty(OPTIMIZE_HASH_GENERATION, Boolean.toString(false))
.setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.BROADCAST.name()) .setSystemProperty(FORCE_SINGLE_NODE_OUTPUT, Boolean.toString(false)) .build();
@Test public void testReplicatesUnrestrictedWhenRequiredBySession() { assertReorderJoins() .on(p -> p.join( INNER, p.values(new PlanNodeId("valuesA"), ImmutableList.of(p.symbol("A1")), TWO_ROWS), p.values(new PlanNodeId("valuesB"), ImmutableList.of(p.symbol("B1")), TWO_ROWS), ImmutableList.of(new EquiJoinClause(p.symbol("A1"), p.symbol("B1"))), ImmutableList.of(p.symbol("A1"), p.symbol("B1")), Optional.empty())) .setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "1kB") .setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()) .overrideStats("valuesA", PlanNodeStatsEstimate.builder() .setOutputRowCount(10000) .addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 640000, 100))) .build()) .overrideStats("valuesB", PlanNodeStatsEstimate.builder() .setOutputRowCount(10000) .addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000, 100))) .build()) .matches(join( INNER, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(REPLICATED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0)))); }
@Test public void testRepartitionsWhenRequiredBySession() { assertReorderJoins() .on(p -> p.join( INNER, p.values(new PlanNodeId("valuesA"), ImmutableList.of(p.symbol("A1")), TWO_ROWS), p.values(new PlanNodeId("valuesB"), ImmutableList.of(p.symbol("B1")), TWO_ROWS), ImmutableList.of(new EquiJoinClause(p.symbol("A1"), p.symbol("B1"))), ImmutableList.of(p.symbol("A1"), p.symbol("B1")), Optional.empty())) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.PARTITIONED.name()) .overrideStats("valuesA", PlanNodeStatsEstimate.builder() .setOutputRowCount(100) .addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 6400, 100))) .build()) .overrideStats("valuesB", PlanNodeStatsEstimate.builder() .setOutputRowCount(10000) .addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000, 100))) .build()) .matches(join( INNER, ImmutableList.of(equiJoinClause("B1", "A1")), Optional.empty(), Optional.of(PARTITIONED), values(ImmutableMap.of("B1", 0)), values(ImmutableMap.of("A1", 0)))); }
@Test public void testReplicatesUnrestrictedWhenRequiredBySession() { assertReorderJoins() .on(p -> p.join( INNER, p.values(new PlanNodeId("valuesA"), ImmutableList.of(p.symbol("A1")), TWO_ROWS), p.values(new PlanNodeId("valuesB"), ImmutableList.of(p.symbol("B1")), TWO_ROWS), ImmutableList.of(new EquiJoinClause(p.symbol("A1"), p.symbol("B1"))), ImmutableList.of(p.symbol("A1"), p.symbol("B1")), Optional.empty())) .setSystemProperty(JOIN_MAX_BROADCAST_TABLE_SIZE, "1kB") .setSystemProperty(JOIN_DISTRIBUTION_TYPE, BROADCAST.name()) .overrideStats("valuesA", PlanNodeStatsEstimate.builder() .setOutputRowCount(10000) .addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 640000, 100))) .build()) .overrideStats("valuesB", PlanNodeStatsEstimate.builder() .setOutputRowCount(10000) .addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000, 100))) .build()) .matches(join( INNER, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(REPLICATED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0)))); }
@Test public void testRepartitionsWhenRequiredBySession() { assertReorderJoins() .on(p -> p.join( INNER, p.values(new PlanNodeId("valuesA"), ImmutableList.of(p.symbol("A1")), TWO_ROWS), p.values(new PlanNodeId("valuesB"), ImmutableList.of(p.symbol("B1")), TWO_ROWS), ImmutableList.of(new EquiJoinClause(p.symbol("A1"), p.symbol("B1"))), ImmutableList.of(p.symbol("A1"), p.symbol("B1")), Optional.empty())) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.PARTITIONED.name()) .overrideStats("valuesA", PlanNodeStatsEstimate.builder() .setOutputRowCount(100) .addSymbolStatistics(ImmutableMap.of(new Symbol("A1"), new SymbolStatsEstimate(0, 100, 0, 6400, 100))) .build()) .overrideStats("valuesB", PlanNodeStatsEstimate.builder() .setOutputRowCount(10000) .addSymbolStatistics(ImmutableMap.of(new Symbol("B1"), new SymbolStatsEstimate(0, 100, 0, 640000, 100))) .build()) .matches(join( INNER, ImmutableList.of(equiJoinClause("B1", "A1")), Optional.empty(), Optional.of(PARTITIONED), values(ImmutableMap.of("B1", 0)), values(ImmutableMap.of("A1", 0)))); }
@Test public void testReplicateScalar() { assertDetermineJoinDistributionType() .on(p -> p.join( INNER, p.values(ImmutableList.of(p.symbol("A1")), ImmutableList.of(expressions("10"), expressions("11"))), p.enforceSingleRow( p.values(ImmutableList.of(p.symbol("B1")), ImmutableList.of(expressions("50"), expressions("11")))), ImmutableList.of(new JoinNode.EquiJoinClause(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT))), ImmutableList.of(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT)), Optional.empty())) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.PARTITIONED.name()) .matches(join( INNER, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(DistributionType.REPLICATED), values(ImmutableMap.of("A1", 0)), enforceSingleRow(values(ImmutableMap.of("B1", 0))))); }
private void testReplicateNoEquiCriteria(Type joinType) { assertDetermineJoinDistributionType() .on(p -> p.join( joinType, p.values(ImmutableList.of(p.symbol("A1")), ImmutableList.of(expressions("10"), expressions("11"))), p.values(ImmutableList.of(p.symbol("B1")), ImmutableList.of(expressions("50"), expressions("11"))), ImmutableList.of(), ImmutableList.of(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT)), Optional.of(expression("A1 * B1 > 100")))) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, JoinDistributionType.PARTITIONED.name()) .matches(join( joinType, ImmutableList.of(), Optional.of("A1 * B1 > 100"), Optional.of(DistributionType.REPLICATED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0)))); }
private void testRepartitionRightOuter(JoinDistributionType sessionDistributedJoin, Type joinType) { assertDetermineJoinDistributionType() .on(p -> p.join( joinType, p.values(ImmutableList.of(p.symbol("A1")), ImmutableList.of(expressions("10"), expressions("11"))), p.values(ImmutableList.of(p.symbol("B1")), ImmutableList.of(expressions("50"), expressions("11"))), ImmutableList.of(new JoinNode.EquiJoinClause(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT))), ImmutableList.of(p.symbol("A1", BIGINT), p.symbol("B1", BIGINT)), Optional.empty())) .setSystemProperty(JOIN_DISTRIBUTION_TYPE, sessionDistributedJoin.name()) .matches(join( joinType, ImmutableList.of(equiJoinClause("A1", "B1")), Optional.empty(), Optional.of(DistributionType.PARTITIONED), values(ImmutableMap.of("A1", 0)), values(ImmutableMap.of("B1", 0)))); }