@Test public void testBuild_ImplicitName() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .combineBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertEquals("ReduceByKey", reduce.getName()); }
@SuppressWarnings("unchecked") @Override public DAG<Operator<?, ?>> getBasicOps() { StateSupport.MergeFromStateMerger stateCombine = new StateSupport.MergeFromStateMerger<>(); StateFactory stateFactory = reducer.isCombinable() ? new CombiningReduceState.Factory<>((ReduceFunctor) reducer) : new NonCombiningReduceState.Factory<>(reducer, valueComparator); Flow flow = getFlow(); Operator reduceState = new ReduceStateByKey(getName(), flow, input, keyExtractor, valueExtractor, windowing, stateFactory, stateCombine, getHints()); return DAG.of(reduceState); }
@SuppressWarnings("unchecked") @Override public DAG<Operator<?, ?>> getBasicOps() { StateSupport.MergeFromStateMerger stateCombine = new StateSupport.MergeFromStateMerger<>(); StateFactory stateFactory = reducer.isCombinable() ? new CombiningReduceState.Factory<>((ReduceFunctor) reducer) : new NonCombiningReduceState.Factory<>(reducer, valueComparator); Flow flow = getFlow(); Operator reduceState = new ReduceStateByKey(getName(), flow, input, keyExtractor, valueExtractor, windowing, stateFactory, stateCombine, getHints()); return DAG.of(reduceState); }
input .flatMapToPair(new CompositeKeyExtractor<>(keyExtractor, valueExtractor, windowing)) .setName(operator.getName() + "::extract-key-values"); tuples .reduceByKey(new CombinableReducer<>(combiner)) .setName(operator.getName() + "::combine-by-key"); return new SparkElement<>(kw.window(), kw.timestamp(), Pair.of(kw.key(), el)); }) .setName(operator.getName() + "::wrap-in-spark-element"); tuples .mapToPair(t -> new Tuple2<>(new KeyedWindowValue<>(t._1, t._2), Empty.get())) .setName(operator.getName() + "::create-composite-key") .repartitionAndSortWithinPartitions( partitioner, new SecondarySortComparator<>(operator.getValueComparator())) .setName(operator.getName() + "::secondary-sort") .mapToPair(t -> new Tuple2<>(t._1.toKeyedWindow(), t._1.getValue())) .setName(operator.getName() + "::unwrap-composite-key") .mapPartitionsToPair(ReduceByKeyIterator::new) .setName(operator.getName() + "::create-iterator") .flatMapValues(new Reducer<>(reducer, accumulatorProvider)) .setName(operator.getName() + "::apply-udf"); } else { tuples .repartitionAndSortWithinPartitions(partitioner) .setName(operator.getName() + "::sort")
@Test public void testBuild_OutputValues() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); Dataset<Long> reduced = ReduceByKey.named("ReduceByKeyValues") .of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .outputValues(); assertEquals(flow, reduced.getFlow()); assertEquals(2, flow.size()); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertEquals(flow, reduce.getFlow()); assertEquals("ReduceByKeyValues", reduce.getName()); assertNotNull(reduce.getKeyExtractor()); assertNotNull(reduce.getValueExtractor()); assertNotNull(reduce.getReducer()); assertNull(reduce.getWindowing()); }
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); Time<String> windowing = Time.of(Duration.ofHours(1)); Dataset<Pair<String, Long>> reduced = ReduceByKey.named("ReduceByKey1") .of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .combineBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .windowBy(windowing) .output(); assertEquals(flow, reduced.getFlow()); assertEquals(1, flow.size()); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertEquals(flow, reduce.getFlow()); assertEquals("ReduceByKey1", reduce.getName()); assertNotNull(reduce.getKeyExtractor()); assertNotNull(reduce.valueExtractor); assertNotNull(reduce.reducer); assertEquals(reduced, reduce.output()); assertSame(windowing, reduce.getWindowing()); }