GroupIntoBatchesDoFn( long batchSize, Duration allowedLateness, Coder<K> inputKeyCoder, Coder<InputT> inputValueCoder) { this.batchSize = batchSize; this.allowedLateness = allowedLateness; this.batchSpec = StateSpecs.bag(inputValueCoder); this.numElementsInBatchSpec = StateSpecs.combining( new Combine.BinaryCombineLongFn() { @Override public long identity() { return 0L; } @Override public long apply(long left, long right) { return left + right; } }); this.keySpec = StateSpecs.value(inputKeyCoder); // prefetch every 20% of batchSize elements. Do not prefetch if batchSize is too little this.prefetchFrequency = ((batchSize / 5) <= 1) ? Long.MAX_VALUE : (batchSize / 5); }
/** * Create a state tag for values that use a {@link CombineFnWithContext} to automatically merge * multiple {@code InputT}s into a single {@code OutputT}. */ public static <InputT, AccumT, OutputT> StateTag<CombiningState<InputT, AccumT, OutputT>> combiningValueWithContext( String id, Coder<AccumT> accumCoder, CombineFnWithContext<InputT, AccumT, OutputT> combineFn) { return new SimpleStateTag<>(new StructuredId(id), StateSpecs.combining(accumCoder, combineFn)); }
/** * Create a state tag for values that use a {@link CombineFn} to automatically merge multiple * {@code InputT}s into a single {@code OutputT}. */ public static <InputT, AccumT, OutputT> StateTag<CombiningState<InputT, AccumT, OutputT>> combiningValue( String id, Coder<AccumT> accumCoder, CombineFn<InputT, AccumT, OutputT> combineFn) { return new SimpleStateTag<>(new StructuredId(id), StateSpecs.combining(accumCoder, combineFn)); }
@Test public void testGoodStateParameterSuperclassStateType() throws Exception { DoFnSignatures.getSignature( new DoFn<KV<String, Integer>, Long>() { @StateId("my-id") private final StateSpec<CombiningState<Integer, int[], Integer>> state = StateSpecs.combining(Sum.ofIntegers()); @ProcessElement public void myProcessElement( ProcessContext context, @StateId("my-id") GroupingState<Integer, Integer> groupingState) {} }.getClass()); }
return StateSpecs.combining( (Coder) components.getCoder(stateSpec.getCombiningSpec().getAccumulatorCoderId()), combineFn);
@Test @Category({ValidatesRunner.class, UsesStatefulParDo.class}) public void testCombiningStateParameterSuperclass() { final String stateId = "foo"; DoFn<KV<Integer, Integer>, String> fn = new DoFn<KV<Integer, Integer>, String>() { private static final int EXPECTED_SUM = 8; @StateId(stateId) private final StateSpec<CombiningState<Integer, int[], Integer>> state = StateSpecs.combining(Sum.ofIntegers()); @ProcessElement public void processElement( @Element KV<Integer, Integer> element, @StateId(stateId) GroupingState<Integer, Integer> state, OutputReceiver<String> r) { state.add(element.getValue()); Integer currentValue = state.read(); if (currentValue == EXPECTED_SUM) { r.output("right on"); } } }; PCollection<String> output = pipeline .apply(Create.of(KV.of(123, 4), KV.of(123, 7), KV.of(123, -3))) .apply(ParDo.of(fn)); // There should only be one moment at which the sum is exactly 8 PAssert.that(output).containsInAnyOrder("right on"); pipeline.run(); }
combiningState = StateSpecs.combining(new Mean.CountSumCoder<Double>(), Mean.of());
StateSpecs.combining( new Combine.CombineFn<Integer, MyInteger, Integer>() { @Override
StateSpecs.combining( new Combine.CombineFn<Integer, MyInteger, Integer>() { @Override