final Dataset<Pair<String, Long>> aggregated = ReduceByKey.named("AGGREGATE") .of(parsedWithEventTime) .keyBy(LogLine::getIp)
final Dataset<Pair<String, Long>> counted = ReduceByKey.named("REDUCE") .of(words) .keyBy(String::toLowerCase)
ReduceByKey.named("second") .of(mapped) .keyBy(e -> e)
@Test public void testBuild_OutputValues() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); Dataset<Long> reduced = ReduceByKey.named("ReduceByKeyValues") .of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .outputValues(); assertEquals(flow, reduced.getFlow()); assertEquals(2, flow.size()); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertEquals(flow, reduce.getFlow()); assertEquals("ReduceByKeyValues", reduce.getName()); assertNotNull(reduce.getKeyExtractor()); assertNotNull(reduce.getValueExtractor()); assertNotNull(reduce.getReducer()); assertNull(reduce.getWindowing()); }
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); Time<String> windowing = Time.of(Duration.ofHours(1)); Dataset<Pair<String, Long>> reduced = ReduceByKey.named("ReduceByKey1") .of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .combineBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .windowBy(windowing) .output(); assertEquals(flow, reduced.getFlow()); assertEquals(1, flow.size()); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertEquals(flow, reduce.getFlow()); assertEquals("ReduceByKey1", reduce.getName()); assertNotNull(reduce.getKeyExtractor()); assertNotNull(reduce.valueExtractor); assertNotNull(reduce.reducer); assertEquals(reduced, reduce.output()); assertSame(windowing, reduce.getWindowing()); }
.of(input).using(e -> e).output(SizeHint.FITS_IN_MEMORY); Dataset<Pair<Object, Long>> reduced = ReduceByKey .named("reduceByKeyTwoHints") .of(mapped) .keyBy(e -> e)