static boolean wantTranslate(ReduceByKey operator) { boolean b = operator.isCombinable() && (operator.getWindowing() == null || (!(operator.getWindowing() instanceof MergingWindowing) && !operator.getWindowing().getTrigger().isStateful())); return b; }
static boolean wantTranslate(ReduceByKey operator, SparkFlowTranslator.AcceptorContext context) { return (operator.getValueComparator() == null || ClassUtils.isComparable(operator.getKeyClass())) && (operator.getWindowing() == null || (!(operator.getWindowing() instanceof MergingWindowing) && !operator.getWindowing().getTrigger().isStateful())); }
@Test public void testBuild_Windowing() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .combineBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .windowBy(Time.of(Duration.ofHours(1))) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertTrue(reduce.getWindowing() instanceof Time); assertNull(reduce.valueComparator); }
@Test public void testWindow_applyIf() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .withSortedValues(Long::compare) .applyIf(true, b -> b.windowBy(Time.of(Duration.ofHours(1)))) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertTrue(reduce.getWindowing() instanceof Time); }
@Test public void testWindow_applyIfNot() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .withSortedValues(Long::compare) .applyIf(false, b -> b, b -> b.windowBy(Time.of(Duration.ofHours(1)))) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertTrue(reduce.getWindowing() instanceof Time); }
@Test public void testBuild_OutputValues() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); Dataset<Long> reduced = ReduceByKey.named("ReduceByKeyValues") .of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .outputValues(); assertEquals(flow, reduced.getFlow()); assertEquals(2, flow.size()); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertEquals(flow, reduce.getFlow()); assertEquals("ReduceByKeyValues", reduce.getName()); assertNotNull(reduce.getKeyExtractor()); assertNotNull(reduce.getValueExtractor()); assertNotNull(reduce.getReducer()); assertNull(reduce.getWindowing()); }
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); Time<String> windowing = Time.of(Duration.ofHours(1)); Dataset<Pair<String, Long>> reduced = ReduceByKey.named("ReduceByKey1") .of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .combineBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .windowBy(windowing) .output(); assertEquals(flow, reduced.getFlow()); assertEquals(1, flow.size()); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertEquals(flow, reduce.getFlow()); assertEquals("ReduceByKey1", reduce.getName()); assertNotNull(reduce.getKeyExtractor()); assertNotNull(reduce.valueExtractor); assertNotNull(reduce.reducer); assertEquals(reduced, reduce.output()); assertSame(windowing, reduce.getWindowing()); }
final UnaryFunctor<Stream, Object> reducer = origOperator.getReducer(); final Windowing windowing = origOperator.getWindowing() == null ? AttachedWindowing.INSTANCE : origOperator.getWindowing();
operator.getWindowing() == null ? AttachedWindowing.INSTANCE : operator.getWindowing();