GroupIntoBatchesDoFn( long batchSize, Duration allowedLateness, Coder<K> inputKeyCoder, Coder<InputT> inputValueCoder) { this.batchSize = batchSize; this.allowedLateness = allowedLateness; this.batchSpec = StateSpecs.bag(inputValueCoder); this.numElementsInBatchSpec = StateSpecs.combining( new Combine.BinaryCombineLongFn() { @Override public long identity() { return 0L; } @Override public long apply(long left, long right) { return left + right; } }); this.keySpec = StateSpecs.value(inputKeyCoder); // prefetch every 20% of batchSize elements. Do not prefetch if batchSize is too little this.prefetchFrequency = ((batchSize / 5) <= 1) ? Long.MAX_VALUE : (batchSize / 5); }
/** Create a simple state tag for values of type {@code T}. */ public static <T> StateTag<ValueState<T>> value(String id, Coder<T> valueCoder) { return new SimpleStateTag<>(new StructuredId(id), StateSpecs.value(valueCoder)); }
@Test public void testSimpleStateIdAnonymousDoFn() throws Exception { DoFnSignature sig = DoFnSignatures.getSignature( new DoFn<KV<String, Integer>, Long>() { @StateId("foo") private final StateSpec<ValueState<Integer>> bizzle = StateSpecs.value(VarIntCoder.of()); @ProcessElement public void foo(ProcessContext context) {} }.getClass()); assertThat(sig.stateDeclarations().size(), equalTo(1)); DoFnSignature.StateDeclaration decl = sig.stateDeclarations().get("foo"); assertThat(decl.id(), equalTo("foo")); assertThat(decl.field().getName(), equalTo("bizzle")); assertThat( decl.stateType(), Matchers.<TypeDescriptor<?>>equalTo(new TypeDescriptor<ValueState<Integer>>() {})); }
switch (stateSpec.getSpecCase()) { case VALUE_SPEC: return StateSpecs.value(components.getCoder(stateSpec.getValueSpec().getCoderId())); case BAG_SPEC: return StateSpecs.bag(components.getCoder(stateSpec.getBagSpec().getElementCoderId()));
@Test public void testDeclOfStateUsedInSuperclass() throws Exception { thrown.expect(IllegalArgumentException.class); thrown.expectMessage("process"); thrown.expectMessage("declared in a different class"); thrown.expectMessage(DoFnUsingState.STATE_ID); DoFnSignatures.getSignature( new DoFnUsingState() { @StateId(DoFnUsingState.STATE_ID) private final StateSpec<ValueState<Integer>> spec = StateSpecs.value(VarIntCoder.of()); }.getClass()); }
@Test public void testStateIdNonFinal() throws Exception { thrown.expect(IllegalArgumentException.class); thrown.expectMessage("State declarations must be final"); thrown.expectMessage("Non-final field"); thrown.expectMessage("myfield"); thrown.expectMessage(not(mentionsTimers())); DoFnSignatures.getSignature( new DoFn<KV<String, Integer>, Long>() { @StateId("my-id") private StateSpec<ValueState<Integer>> myfield = StateSpecs.value(VarIntCoder.of()); @ProcessElement public void foo(ProcessContext context) {} }.getClass()); }
@StateId("foo") private final StateSpec<ValueState<Integer>> bizzleDecl = StateSpecs.value(VarIntCoder.of());
@Test public void testStateIdDuplicate() throws Exception { thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Duplicate"); thrown.expectMessage("StateId"); thrown.expectMessage("my-id"); thrown.expectMessage("myfield1"); thrown.expectMessage("myfield2"); thrown.expectMessage(not(mentionsTimers())); DoFnSignature sig = DoFnSignatures.getSignature( new DoFn<KV<String, Integer>, Long>() { @StateId("my-id") private final StateSpec<ValueState<Integer>> myfield1 = StateSpecs.value(VarIntCoder.of()); @StateId("my-id") private final StateSpec<ValueState<Long>> myfield2 = StateSpecs.value(VarLongCoder.of()); @ProcessElement public void foo(ProcessContext context) {} }.getClass()); }
StateSpecs.value(VarIntCoder.of());
@Test public void testStateParameterDuplicate() throws Exception { thrown.expect(IllegalArgumentException.class); thrown.expectMessage("duplicate"); thrown.expectMessage("my-id"); thrown.expectMessage("myProcessElement"); thrown.expectMessage("index 2"); thrown.expectMessage(not(mentionsTimers())); DoFnSignatures.getSignature( new DoFn<KV<String, Integer>, Long>() { @StateId("my-id") private final StateSpec<ValueState<Integer>> myfield = StateSpecs.value(VarIntCoder.of()); @ProcessElement public void myProcessElement( ProcessContext context, @StateId("my-id") ValueState<Integer> one, @StateId("my-id") ValueState<Integer> two) {} }.getClass()); }
@Test public void testStateParameterWrongGenericType() throws Exception { thrown.expect(IllegalArgumentException.class); thrown.expectMessage("ValueState<String>"); thrown.expectMessage("reference to"); thrown.expectMessage("supertype"); thrown.expectMessage("ValueState<Integer>"); thrown.expectMessage("my-id"); thrown.expectMessage("myProcessElement"); thrown.expectMessage("index 1"); thrown.expectMessage(not(mentionsTimers())); DoFnSignatures.getSignature( new DoFn<KV<String, Integer>, Long>() { @StateId("my-id") private final StateSpec<ValueState<Integer>> myfield = StateSpecs.value(VarIntCoder.of()); @ProcessElement public void myProcessElement( ProcessContext context, @StateId("my-id") ValueState<String> stringState) {} }.getClass()); }
@Test public void testStateParameterWrongStateType() throws Exception { thrown.expect(IllegalArgumentException.class); thrown.expectMessage("WatermarkHoldState"); thrown.expectMessage("reference to"); thrown.expectMessage("supertype"); thrown.expectMessage("ValueState"); thrown.expectMessage("my-id"); thrown.expectMessage("myProcessElement"); thrown.expectMessage("index 1"); thrown.expectMessage(not(mentionsTimers())); DoFnSignatures.getSignature( new DoFn<KV<String, Integer>, Long>() { @StateId("my-id") private final StateSpec<ValueState<Integer>> myfield = StateSpecs.value(VarIntCoder.of()); @ProcessElement public void myProcessElement( ProcessContext context, @StateId("my-id") WatermarkHoldState watermark) {} }.getClass()); }
@Test public void testStateNotKeyed() { final String stateId = "foo"; DoFn<String, Integer> fn = new DoFn<String, Integer>() { @StateId(stateId) private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(); @ProcessElement public void processElement( ProcessContext c, @StateId(stateId) ValueState<Integer> state) {} }; thrown.expect(IllegalArgumentException.class); thrown.expectMessage("state"); thrown.expectMessage("KvCoder"); pipeline.apply(Create.of("hello", "goodbye", "hello again")).apply(ParDo.of(fn)); }
@Parameters(name = "{index}: {0}") public static Iterable<StateSpec<?>> stateSpecs() { return ImmutableList.of( StateSpecs.value(VarIntCoder.of()), StateSpecs.bag(VarIntCoder.of()), StateSpecs.set(VarIntCoder.of()), StateSpecs.map(StringUtf8Coder.of(), VarIntCoder.of())); }
@Test public void testStateNotDeterministic() { final String stateId = "foo"; // DoubleCoder is not deterministic, so this should crash DoFn<KV<Double, String>, Integer> fn = new DoFn<KV<Double, String>, Integer>() { @StateId(stateId) private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(); @ProcessElement public void processElement( ProcessContext c, @StateId(stateId) ValueState<Integer> state) {} }; thrown.expect(IllegalArgumentException.class); thrown.expectMessage("state"); thrown.expectMessage("deterministic"); pipeline .apply(Create.of(KV.of(1.0, "hello"), KV.of(5.4, "goodbye"), KV.of(7.2, "hello again"))) .apply(ParDo.of(fn)); }
private void verifyMergingStatefulParDoRejected(PipelineOptions options) throws Exception { Pipeline p = Pipeline.create(options); p.apply(Create.of(KV.of(13, 42))) .apply(Window.into(Sessions.withGapDuration(Duration.millis(1)))) .apply( ParDo.of( new DoFn<KV<Integer, Integer>, Void>() { @StateId("fizzle") private final StateSpec<ValueState<Void>> voidState = StateSpecs.value(); @ProcessElement public void process() {} })); thrown.expectMessage("merging"); thrown.expect(UnsupportedOperationException.class); p.run(); }
@Test @Category({ValidatesRunner.class, UsesStatefulParDo.class}) public void testValueStateSimple() { final String stateId = "foo"; DoFn<KV<String, Integer>, Integer> fn = new DoFn<KV<String, Integer>, Integer>() { @StateId(stateId) private final StateSpec<ValueState<Integer>> intState = StateSpecs.value(VarIntCoder.of()); @ProcessElement public void processElement( @StateId(stateId) ValueState<Integer> state, OutputReceiver<Integer> r) { Integer currentValue = MoreObjects.firstNonNull(state.read(), 0); r.output(currentValue); state.write(currentValue + 1); } }; PCollection<Integer> output = pipeline .apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))) .apply(ParDo.of(fn)); PAssert.that(output).containsInAnyOrder(0, 1, 2); pipeline.run(); }
private final StateSpec<ValueState<Integer>> countSpec = StateSpecs.value();
@Test @Category({ValidatesRunner.class, UsesStatefulParDo.class}) public void testValueStateCoderInferenceFailure() throws Exception { final String stateId = "foo"; MyIntegerCoder myIntegerCoder = MyIntegerCoder.of(); DoFn<KV<String, Integer>, MyInteger> fn = new DoFn<KV<String, Integer>, MyInteger>() { @StateId(stateId) private final StateSpec<ValueState<MyInteger>> intState = StateSpecs.value(); @ProcessElement public void processElement( @StateId(stateId) ValueState<MyInteger> state, OutputReceiver<MyInteger> r) { MyInteger currentValue = MoreObjects.firstNonNull(state.read(), new MyInteger(0)); r.output(currentValue); state.write(new MyInteger(currentValue.getValue() + 1)); } }; thrown.expect(RuntimeException.class); thrown.expectMessage("Unable to infer a coder for ValueState and no Coder was specified."); pipeline .apply(Create.of(KV.of("hello", 42), KV.of("hello", 97), KV.of("hello", 84))) .apply(ParDo.of(fn)) .setCoder(myIntegerCoder); pipeline.run(); }
private final StateSpec<ValueState<MyInteger>> intState = StateSpecs.value();