@SuppressWarnings("unchecked") static boolean wantTranslate(Join join, SparkFlowTranslator.AcceptorContext context) { return (join.getWindowing() == null || join.getWindowing() instanceof GlobalWindowing) && (ClassUtils.isComparable(join.getKeyClass()) || context.hasComparator(join.getKeyClass())); }
@SuppressWarnings("unchecked") public static boolean wantTranslateBroadcastHashJoin(Join o) { final ArrayList<Dataset> inputs = new ArrayList(o.listInputs()); if (inputs.size() != 2) { return false; } final Dataset leftDataset = inputs.get(0); final Dataset rightDataset = inputs.get(1); return (o.getType() == Join.Type.LEFT && hasFitsInMemoryHint(rightDataset.getProducer()) || o.getType() == Join.Type.RIGHT && hasFitsInMemoryHint(leftDataset.getProducer()) ) && !(o.getWindowing() instanceof MergingWindowing); }
@SuppressWarnings("unchecked") public static boolean wantTranslateBroadcastHashJoin(Join o) { final ArrayList<Dataset> inputs = new ArrayList(o.listInputs()); if (inputs.size() != 2) { return false; } final Dataset leftDataset = inputs.get(0); final Dataset rightDataset = inputs.get(1); return (o.getType() == Join.Type.LEFT && hasFitsInMemoryHint(rightDataset.getProducer()) || o.getType() == Join.Type.RIGHT && hasFitsInMemoryHint(leftDataset.getProducer()) ) && !(o.getWindowing() instanceof MergingWindowing); }
private static void checkJoinWindowing(Node<Operator<?, ?>> node) { Preconditions.checkState(node.get() instanceof Join); // ~ if a windowing strategy is explicitly provided by the user, all is fine if (((Join) node.get()).getWindowing() != null) { return; } for (Node<Operator<?, ?>> parent : node.getParents()) { if (!isBatched(parent)) { throw new WindowingRequiredException( "Join operator requires either an explicit windowing" + " strategy or needs to be supplied with batched inputs."); } } }
private static void checkJoinWindowing(Node<Operator<?, ?>> node) { Preconditions.checkState(node.get() instanceof Join); // ~ if a windowing strategy is explicitly provided by the user, all is fine if (((Join) node.get()).getWindowing() != null) { return; } for (Node<Operator<?, ?>> parent : node.getParents()) { if (!isBatched(parent)) { throw new WindowingRequiredException( "Join operator requires either an explicit windowing" + " strategy or needs to be supplied with batched inputs."); } } }
final JavaRDD<SparkElement> right = (JavaRDD<SparkElement>) inputs.get(1); final Windowing windowing = operator.getWindowing() == null ? AttachedWindowing.INSTANCE : operator.getWindowing();
final UnaryFunction rightKeyExtractor = originalOperator.getRightKeyExtractor(); final Windowing windowing = originalOperator.getWindowing() == null ? AttachedWindowing.INSTANCE : originalOperator.getWindowing(); DataSet<BatchElement<Window, Pair>> leftExtracted = left.flatMap(new KeyExtractor(leftKeyExtractor, windowing))
).getProducer().getHints().size()); assertTrue(join.getWindowing() instanceof Time);
@Test public void testBuild_Windowing() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 1); Dataset<String> right = Util.createMockDataset(flow, 1); Join.named("Join1") .of(left, right) .by(String::length, String::length) .using((String l, String r, Collector<String> c) -> c.collect(l + r)) .windowBy(Time.of(Duration.ofHours(1))) .output(); Join join = (Join) flow.operators().iterator().next(); assertTrue(join.getWindowing() instanceof Time); }
keyExtractor, e -> e, getWindowing(), (StateContext context, Collector ctx) -> { StorageProvider storages = context.getStorageProvider();
keyExtractor, e -> e, getWindowing(), (StateContext context, Collector ctx) -> { StorageProvider storages = context.getStorageProvider();
operator.getWindowing() == null || operator.getWindowing() instanceof GlobalWindowing, "BatchJoinTranslator only supports GlobalWindowing.");
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 2); Dataset<String> right = Util.createMockDataset(flow, 3); Dataset<Pair<Integer, String>> joined = Join.named("Join1") .of(left, right) .by(String::length, String::length) .using((String l, String r, Collector<String> c) -> { // no-op }) .output(); assertEquals(flow, joined.getFlow()); assertEquals(1, flow.size()); Join join = (Join) flow.operators().iterator().next(); assertEquals(flow, join.getFlow()); assertEquals("Join1", join.getName()); assertNotNull(join.leftKeyExtractor); assertNotNull(join.rightKeyExtractor); assertEquals(joined, join.output()); assertNull(join.getWindowing()); assertEquals(Join.Type.INNER, join.getType()); }
@Test public void testBuild_OutputValues() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 2); Dataset<String> right = Util.createMockDataset(flow, 3); Dataset<String> joined = Join.named("JoinValues") .of(left, right) .by(String::length, String::length) .using((String l, String r, Collector<String> c) -> { // no-op }) .outputValues(); assertEquals(flow, joined.getFlow()); assertEquals(2, flow.size()); Join join = (Join) flow.operators().iterator().next(); assertEquals(flow, join.getFlow()); assertEquals("JoinValues", join.getName()); assertNotNull(join.getLeftKeyExtractor()); assertNotNull(join.getRightKeyExtractor()); assertNull(join.getWindowing()); assertEquals(Join.Type.INNER, join.getType()); }
@Test public void testBuild_WithCounters() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 2); Dataset<String> right = Util.createMockDataset(flow, 3); Dataset<Pair<Integer, String>> joined = Join.named("Join1") .of(left, right) .by(String::length, String::length) .using((String l, String r, Collector<String> c) -> { c.getCounter("my-counter").increment(); c.collect(l + r); }) .output(); assertEquals(flow, joined.getFlow()); assertEquals(1, flow.size()); Join join = (Join) flow.operators().iterator().next(); assertEquals(flow, join.getFlow()); assertEquals("Join1", join.getName()); assertNotNull(join.leftKeyExtractor); assertNotNull(join.rightKeyExtractor); assertEquals(joined, join.output()); assertNull(join.getWindowing()); assertEquals(Join.Type.INNER, join.getType()); }