right .flatMapToPair(rightKeyExtractor) .setName(operator.getName() + "::extract-right") .collect()))); left .flatMapToPair(leftKeyExtractor) .setName(operator.getName() + "::extract-left") .flatMapToPair( t -> { .setName(operator.getName() + "::map-side-left-join"); left .flatMapToPair(leftKeyExtractor) .setName(operator.getName() + "::extract-left") .collect()))); right .flatMapToPair(rightKeyExtractor) .setName(operator.getName() + "::extract-right") .flatMapToPair( t -> { .setName(operator.getName() + "::map-side-right-join"); new LazyAccumulatorProvider( context.getAccumulatorFactory(), context.getSettings()))) .setName(operator.getName() + "::apply-udf-and-wrap-in-spark-element");
@Test public void testBuild_ImplicitName() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 1); Dataset<String> right = Util.createMockDataset(flow, 1); Join.of(left, right) .by(String::length, String::length) .using((String l, String r, Collector<String> c) -> { // no-op }) .output(); Join join = (Join) flow.operators().iterator().next(); assertEquals("Join", join.getName()); }
Either.<SparkElement, SparkElement>left(se)); }) .setName(operator.getName() + "::wrap-keys"); }); rightPair.setName(operator.getName() + "::wrap-values"); .setName(operator.getName() + "::union-inputs") .repartitionAndSortWithinPartitions(partitioner, comparator) .setName(operator.getName() + "::sort-by-key-and-side") .mapPartitions( iterator -> new JoinIterator<>(new BatchJoinIterator<>(iterator), operator.getType())) .setName(operator.getName() + "::create-iterator") .flatMap( new FlatMapFunctionWithCollector<>( new LazyAccumulatorProvider( context.getAccumulatorFactory(), context.getSettings()))) .setName(operator.getName() + "::apply-udf-and-wrap-in-spark-element");
@Test public void testBuild_OutputValues() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 2); Dataset<String> right = Util.createMockDataset(flow, 3); Dataset<String> joined = Join.named("JoinValues") .of(left, right) .by(String::length, String::length) .using((String l, String r, Collector<String> c) -> { // no-op }) .outputValues(); assertEquals(flow, joined.getFlow()); assertEquals(2, flow.size()); Join join = (Join) flow.operators().iterator().next(); assertEquals(flow, join.getFlow()); assertEquals("JoinValues", join.getName()); assertNotNull(join.getLeftKeyExtractor()); assertNotNull(join.getRightKeyExtractor()); assertNull(join.getWindowing()); assertEquals(Join.Type.INNER, join.getType()); }
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 2); Dataset<String> right = Util.createMockDataset(flow, 3); Dataset<Pair<Integer, String>> joined = Join.named("Join1") .of(left, right) .by(String::length, String::length) .using((String l, String r, Collector<String> c) -> { // no-op }) .output(); assertEquals(flow, joined.getFlow()); assertEquals(1, flow.size()); Join join = (Join) flow.operators().iterator().next(); assertEquals(flow, join.getFlow()); assertEquals("Join1", join.getName()); assertNotNull(join.leftKeyExtractor); assertNotNull(join.rightKeyExtractor); assertEquals(joined, join.output()); assertNull(join.getWindowing()); assertEquals(Join.Type.INNER, join.getType()); }
@Test public void testBuild_WithCounters() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 2); Dataset<String> right = Util.createMockDataset(flow, 3); Dataset<Pair<Integer, String>> joined = Join.named("Join1") .of(left, right) .by(String::length, String::length) .using((String l, String r, Collector<String> c) -> { c.getCounter("my-counter").increment(); c.collect(l + r); }) .output(); assertEquals(flow, joined.getFlow()); assertEquals(1, flow.size()); Join join = (Join) flow.operators().iterator().next(); assertEquals(flow, join.getFlow()); assertEquals("Join1", join.getName()); assertNotNull(join.leftKeyExtractor); assertNotNull(join.rightKeyExtractor); assertEquals(joined, join.output()); assertNull(join.getWindowing()); assertEquals(Join.Type.INNER, join.getType()); }