@SuppressWarnings("unchecked") public static boolean wantTranslateBroadcastHashJoin(Join o) { final ArrayList<Dataset> inputs = new ArrayList(o.listInputs()); if (inputs.size() != 2) { return false; } final Dataset leftDataset = inputs.get(0); final Dataset rightDataset = inputs.get(1); return (o.getType() == Join.Type.LEFT && hasFitsInMemoryHint(rightDataset.getProducer()) || o.getType() == Join.Type.RIGHT && hasFitsInMemoryHint(leftDataset.getProducer()) ) && !(o.getWindowing() instanceof MergingWindowing); }
@SuppressWarnings("unchecked") public static boolean wantTranslateBroadcastHashJoin(Join o) { final ArrayList<Dataset> inputs = new ArrayList(o.listInputs()); if (inputs.size() != 2) { return false; } final Dataset leftDataset = inputs.get(0); final Dataset rightDataset = inputs.get(1); return (o.getType() == Join.Type.LEFT && hasFitsInMemoryHint(rightDataset.getProducer()) || o.getType() == Join.Type.RIGHT && hasFitsInMemoryHint(leftDataset.getProducer()) ) && !(o.getWindowing() instanceof MergingWindowing); }
"Missing broadcastHashJoin hint"); Preconditions.checkArgument( operator.getType() == Join.Type.LEFT || operator.getType() == Join.Type.RIGHT, "BroadcastJoin supports LEFT and RIGHT joins only"); switch (operator.getType()) { case LEFT: { throw new IllegalStateException("Invalid type: " + operator.getType() + ".");
@Test public void testBuild_FullJoin() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 1); Dataset<String> right = Util.createMockDataset(flow, 1); FullJoin.named("Join1") .of(left, right) .by(String::length, String::length) .using((Optional<String> l, Optional<String> r, Collector<String> c) -> c.collect(l.orElse(null) + r.orElse(null))) .output(); Join join = (Join) flow.operators().iterator().next(); assertEquals(Join.Type.FULL, join.getType()); }
@Test public void testBuild_LeftJoin() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 1); Dataset<String> right = Util.createMockDataset(flow, 1); LeftJoin.named("Join1") .of(left, right) .by(String::length, String::length) .using((String l, Optional<String> r, Collector<String> c) -> { // no-op }) .output(); Join join = (Join) flow.operators().iterator().next(); assertEquals(Join.Type.LEFT, join.getType()); }
@Test public void testBuild_RightJoin() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 1); Dataset<String> right = Util.createMockDataset(flow, 1); RightJoin.named("Join1") .of(left, right) .by(String::length, String::length) .using((Optional<String> l, String r, Collector<String> c) -> { // no-op }) .output(); Join join = (Join) flow.operators().iterator().next(); assertEquals(Join.Type.RIGHT, join.getType()); }
@Test public void testBuild_OutputValues() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 2); Dataset<String> right = Util.createMockDataset(flow, 3); Dataset<String> joined = Join.named("JoinValues") .of(left, right) .by(String::length, String::length) .using((String l, String r, Collector<String> c) -> { // no-op }) .outputValues(); assertEquals(flow, joined.getFlow()); assertEquals(2, flow.size()); Join join = (Join) flow.operators().iterator().next(); assertEquals(flow, join.getFlow()); assertEquals("JoinValues", join.getName()); assertNotNull(join.getLeftKeyExtractor()); assertNotNull(join.getRightKeyExtractor()); assertNull(join.getWindowing()); assertEquals(Join.Type.INNER, join.getType()); }
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 2); Dataset<String> right = Util.createMockDataset(flow, 3); Dataset<Pair<Integer, String>> joined = Join.named("Join1") .of(left, right) .by(String::length, String::length) .using((String l, String r, Collector<String> c) -> { // no-op }) .output(); assertEquals(flow, joined.getFlow()); assertEquals(1, flow.size()); Join join = (Join) flow.operators().iterator().next(); assertEquals(flow, join.getFlow()); assertEquals("Join1", join.getName()); assertNotNull(join.leftKeyExtractor); assertNotNull(join.rightKeyExtractor); assertEquals(joined, join.output()); assertNull(join.getWindowing()); assertEquals(Join.Type.INNER, join.getType()); }
.setName(operator.getName() + "::sort-by-key-and-side") .mapPartitions( iterator -> new JoinIterator<>(new BatchJoinIterator<>(iterator), operator.getType())) .setName(operator.getName() + "::create-iterator") .flatMap(
@Test public void testBuild_WithCounters() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 2); Dataset<String> right = Util.createMockDataset(flow, 3); Dataset<Pair<Integer, String>> joined = Join.named("Join1") .of(left, right) .by(String::length, String::length) .using((String l, String r, Collector<String> c) -> { c.getCounter("my-counter").increment(); c.collect(l + r); }) .output(); assertEquals(flow, joined.getFlow()); assertEquals(1, flow.size()); Join join = (Join) flow.operators().iterator().next(); assertEquals(flow, join.getFlow()); assertEquals("Join1", join.getName()); assertNotNull(join.leftKeyExtractor); assertNotNull(join.rightKeyExtractor); assertEquals(joined, join.output()); assertNull(join.getWindowing()); assertEquals(Join.Type.INNER, join.getType()); }