@Override public Dataset<Pair<KEY, OUT>> output(OutputHint... outputHints) { Flow flow = input.getFlow(); ReduceByKey<IN, KEY, VALUE, OUT, W> reduce = new ReduceByKey<>( name, flow, input, keyExtractor, keyClass, valueExtractor, windowing, reducer, valuesComparator, Sets.newHashSet(outputHints)); flow.add(reduce); return reduce.output(); } }
@Override public Dataset<Pair<KEY, OUT>> output(OutputHint... outputHints) { Flow flow = input.getFlow(); ReduceByKey<IN, KEY, VALUE, OUT, W> reduce = new ReduceByKey<>( name, flow, input, keyExtractor, keyClass, valueExtractor, windowing, reducer, valuesComparator, Sets.newHashSet(outputHints)); flow.add(reduce); return reduce.output(); } }
@Override public DAG<Operator<?, ?>> getBasicOps() { Flow flow = input.getFlow(); String name = getName() + "::" + "ReduceByKey"; ReduceByKey<IN, ELEM, Void, Void, W> reduce = new ReduceByKey<>(name, flow, input, getKeyExtractor(), null, e -> null, windowing, (CombinableReduceFunction<Void>) e -> null, Collections.emptySet()); MapElements format = new MapElements<>( getName() + "::" + "Map", flow, reduce.output(), Pair::getFirst, getHints()); DAG<Operator<?, ?>> dag = DAG.of(reduce); dag.add(format, reduce); return dag; } }
@Override public DAG<Operator<?, ?>> getBasicOps() { Flow flow = input.getFlow(); String name = getName() + "::" + "ReduceByKey"; ReduceByKey<IN, ELEM, Void, Void, W> reduce = new ReduceByKey<>(name, flow, input, getKeyExtractor(), null, e -> null, windowing, (CombinableReduceFunction<Void>) e -> null, Collections.emptySet()); MapElements format = new MapElements<>( getName() + "::" + "Map", flow, reduce.output(), Pair::getFirst, getHints()); DAG<Operator<?, ?>> dag = DAG.of(reduce); dag.add(format, reduce); return dag; } }
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); Time<String> windowing = Time.of(Duration.ofHours(1)); Dataset<Pair<String, Long>> reduced = ReduceByKey.named("ReduceByKey1") .of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .combineBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .windowBy(windowing) .output(); assertEquals(flow, reduced.getFlow()); assertEquals(1, flow.size()); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertEquals(flow, reduce.getFlow()); assertEquals("ReduceByKey1", reduce.getName()); assertNotNull(reduce.getKeyExtractor()); assertNotNull(reduce.valueExtractor); assertNotNull(reduce.reducer); assertEquals(reduced, reduce.output()); assertSame(windowing, reduce.getWindowing()); }
(Dataset) rbk.output(), Pair::getSecond);
(Dataset) rbk.output(), Pair::getSecond);