StateFactory<?, ?, State<?, ?>> stateFactory = operator.getStateFactory(); StateMerger<?, ?, State<?, ?>> stateCombiner = operator.getStateMerger(); final UnaryFunction keyExtractor = operator.getKeyExtractor(); final UnaryFunction valueExtractor = operator.getValueExtractor(); final Windowing windowing = operator.getWindowing() == null ? AttachedWindowing.INSTANCE : operator.getWindowing(); input .flatMapToPair(new CompositeKeyExtractor(keyExtractor, valueExtractor, windowing)) .setName(operator.getName() + "::extract-key-value"); tuples .repartitionAndSortWithinPartitions(groupingPartitioner, comparator) .setName(operator.getName() + "::sort"); new LazyAccumulatorProvider( context.getAccumulatorFactory(), context.getSettings()))) .setName(operator.getName() + "::apply-udf");
@Override protected Dataset<Pair<String, Integer>> getOutput(Dataset<Integer> input) { return ReduceStateByKey.of(input) .keyBy(e -> "") .valueBy(e -> e) .stateFactory(CountingSortState::new) .mergeStatesBy((target, others) -> {}) .output(); }
@Override public Dataset<Pair<KEY, OUT>> output(OutputHint... outputHints) { Flow flow = input.getFlow(); ReduceStateByKey<IN, KEY, VALUE, OUT, STATE, W> reduceStateByKey = new ReduceStateByKey<>(name, flow, input, keyExtractor, valueExtractor, windowing, stateFactory, stateMerger, Sets.newHashSet(outputHints)); flow.add(reduceStateByKey); return reduceStateByKey.output(); } }
@SuppressWarnings("unchecked") RSBKReducer( ReduceStateByKey operator, StateContext stateContext, Windowing windowing, FlinkAccumulatorFactory accumulatorFactory, Settings settings) { this.stateFactory = operator.getStateFactory(); this.stateCombiner = operator.getStateMerger(); this.stateContext = stateContext; this.windowing = windowing; this.trigger = windowing.getTrigger(); this.accumulatorFactory = accumulatorFactory; this.settings = settings; }
this.input = requireNonNull(input); this.output = requireNonNull(output); this.isAttachedWindowing = operator.getWindowing() == null; this.windowing = isAttachedWindowing ? AttachedWindowing.INSTANCE : operator.getWindowing(); this.keyExtractor = requireNonNull(keyExtractor); this.valueExtractor = requireNonNull(valueExtractor); this.processing = new ProcessingState( output, scheduler, requireNonNull(operator.getStateFactory()), requireNonNull(operator.getStateMerger()), stateContext, allowEarlyEmitting);
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); Time<String> windowing = Time.of(Duration.ofHours(1)); Dataset<Pair<String, Long>> reduced = ReduceStateByKey.named("ReduceStateByKey1") .of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .stateFactory(WordCountState::new) .mergeStatesBy(WordCountState::combine) .windowBy(windowing) .output(); assertEquals(flow, reduced.getFlow()); assertEquals(1, flow.size()); ReduceStateByKey reduce = (ReduceStateByKey) flow.operators().iterator().next(); assertEquals(flow, reduce.getFlow()); assertEquals("ReduceStateByKey1", reduce.getName()); assertNotNull(reduce.getKeyExtractor()); assertNotNull(reduce.getValueExtractor()); assertNotNull(reduce.getStateMerger()); assertNotNull(reduce.getStateFactory()); assertEquals(reduced, reduce.output()); assertSame(windowing, reduce.getWindowing()); }
final UnaryFunction keyExtractor = reduceStateByKey.getKeyExtractor(); final UnaryFunction valueExtractor = reduceStateByKey.getValueExtractor(); reduceStateByKeyNode.get()); final Windowing windowing = reduceStateByKey.getWindowing(); executor.execute(new ReduceStateByKeyReducer( reduceStateByKey, reduceStateByKey.getName() + "#part-" + (i++), q, output, keyExtractor, valueExtractor, reduceStateByKey.input().isBounded() ? new NoopTriggerScheduler() : (windowing != null
origOperator.getWindowing() == null ? AttachedWindowing.INSTANCE : origOperator.getWindowing(); final UnaryFunction udfKey = origOperator.getKeyExtractor(); final UnaryFunction udfValue = origOperator.getValueExtractor();
@Test public void testBuild_Windowing() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); Dataset<Pair<String, Long>> reduced = ReduceStateByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .stateFactory(WordCountState::new) .mergeStatesBy(WordCountState::combine) .windowBy(Time.of(Duration.ofHours(1))) .output(); ReduceStateByKey reduce = (ReduceStateByKey) flow.operators().iterator().next(); assertTrue(reduce.getWindowing() instanceof Time); }
@Test public void testBuild_ImplicitName() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceStateByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .stateFactory(WordCountState::new) .mergeStatesBy(WordCountState::combine) .output(); ReduceStateByKey reduce = (ReduceStateByKey) flow.operators().iterator().next(); assertEquals("ReduceStateByKey", reduce.getName()); }
@SuppressWarnings("unchecked") @Override public DAG<Operator<?, ?>> getBasicOps() { StateSupport.MergeFromStateMerger stateCombine = new StateSupport.MergeFromStateMerger<>(); StateFactory stateFactory = reducer.isCombinable() ? new CombiningReduceState.Factory<>((ReduceFunctor) reducer) : new NonCombiningReduceState.Factory<>(reducer, valueComparator); Flow flow = getFlow(); Operator reduceState = new ReduceStateByKey(getName(), flow, input, keyExtractor, valueExtractor, windowing, stateFactory, stateCombine, getHints()); return DAG.of(reduceState); }
final UnaryFunction keyExtractor = reduceStateByKey.getKeyExtractor(); final UnaryFunction valueExtractor = reduceStateByKey.getValueExtractor(); reduceStateByKeyNode.get()); final Windowing windowing = reduceStateByKey.getWindowing(); executor.execute(new ReduceStateByKeyReducer( reduceStateByKey, reduceStateByKey.getName() + "#part-" + (i++), q, output, keyExtractor, valueExtractor, reduceStateByKey.input().isBounded() ? new NoopTriggerScheduler() : (windowing != null
this.input = requireNonNull(input); this.output = requireNonNull(output); this.isAttachedWindowing = operator.getWindowing() == null; this.windowing = isAttachedWindowing ? AttachedWindowing.INSTANCE : operator.getWindowing(); this.keyExtractor = requireNonNull(keyExtractor); this.valueExtractor = requireNonNull(valueExtractor); this.processing = new ProcessingState( output, scheduler, requireNonNull(operator.getStateFactory()), requireNonNull(operator.getStateMerger()), stateContext, allowEarlyEmitting);
@Test public void testWindow_applyIf() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceStateByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .stateFactory(WordCountState::new) .mergeStatesBy(WordCountState::combine) .applyIf(true, b -> b.windowBy(Time.of(Duration.ofHours(1)))) .output(); ReduceStateByKey reduce = (ReduceStateByKey) flow.operators().iterator().next(); assertTrue(reduce.getWindowing() instanceof Time); }
@SuppressWarnings("unchecked") @Override public DAG<Operator<?, ?>> getBasicOps() { StateSupport.MergeFromStateMerger stateCombine = new StateSupport.MergeFromStateMerger<>(); StateFactory stateFactory = reducer.isCombinable() ? new CombiningReduceState.Factory<>((ReduceFunctor) reducer) : new NonCombiningReduceState.Factory<>(reducer, valueComparator); Flow flow = getFlow(); Operator reduceState = new ReduceStateByKey(getName(), flow, input, keyExtractor, valueExtractor, windowing, stateFactory, stateCombine, getHints()); return DAG.of(reduceState); }
StateFactory<?, ?, State<?, ?>> stateFactory = origOperator.getStateFactory(); StateMerger<?, ?, State<?, ?>> stateCombiner = origOperator.getStateMerger(); Windowing windowing = origOperator.getWindowing(); if (windowing == null) { final UnaryFunction keyExtractor = origOperator.getKeyExtractor(); final UnaryFunction valueExtractor = origOperator.getValueExtractor();
@Override protected Dataset<Pair<String, Integer>> getOutput(Dataset<Integer> input) { return ReduceStateByKey.of(input) .keyBy(e -> "") .valueBy(e -> e) .stateFactory(CountingSortState::new) .mergeStatesBy((target, others) -> {}) .output(); }
@Override public Dataset<Pair<KEY, OUT>> output(OutputHint... outputHints) { Flow flow = input.getFlow(); ReduceStateByKey<IN, KEY, VALUE, OUT, STATE, W> reduceStateByKey = new ReduceStateByKey<>(name, flow, input, keyExtractor, valueExtractor, windowing, stateFactory, stateMerger, Sets.newHashSet(outputHints)); flow.add(reduceStateByKey); return reduceStateByKey.output(); } }
reduce = new ReduceStateByKey( getName() + "::ReduceStateByKey", flow,
@Override protected Dataset<Pair<String, Integer>> getOutput(Dataset<Integer> input) { return ReduceStateByKey.of(input) .keyBy(e -> "") .valueBy(e -> e) .stateFactory(SortState::new) .mergeStatesBy(SortState::combine) .output(); }