@ProcessElement public void processElement( ProcessContext context, @StateId(SEEN_EVENTS) BagState<T> seenEvents) { seenEvents.add(context.element().getValue()); ImmutableSet<T> eventsSoFar = ImmutableSet.copyOf(seenEvents.read()); // check if all elements seen so far satisfy the success predicate try { if (successPredicate.apply(eventsSoFar)) { context.output("SUCCESS"); } } catch (Throwable e) { context.output("FAILURE: " + e.getMessage()); } } }
@Test public void testBag() throws Exception { BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR); // State instances are cached, but depend on the namespace. assertThat(value, equalTo(underTest.state(NAMESPACE_1, STRING_BAG_ADDR))); assertThat(value, not(equalTo(underTest.state(NAMESPACE_2, STRING_BAG_ADDR)))); assertThat(value.read(), Matchers.emptyIterable()); value.add("hello"); assertThat(value.read(), containsInAnyOrder("hello")); value.add("world"); assertThat(value.read(), containsInAnyOrder("hello", "world")); value.clear(); assertThat(value.read(), Matchers.emptyIterable()); assertThat(underTest.state(NAMESPACE_1, STRING_BAG_ADDR), equalTo(value)); }
@Test public void testBagIsEmpty() throws Exception { BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR); assertThat(value.isEmpty().read(), Matchers.is(true)); ReadableState<Boolean> readFuture = value.isEmpty(); value.add("hello"); assertThat(readFuture.read(), Matchers.is(false)); value.clear(); assertThat(readFuture.read(), Matchers.is(true)); }
/** * Flush elements of bufferState to Flink Output. This method can't be invoke in {@link * #snapshotState(StateSnapshotContext)} */ void flushBuffer() { for (KV<Integer, WindowedValue<?>> taggedElem : bufferState.read()) { emit(idsToTags.get(taggedElem.getKey()), (WindowedValue) taggedElem.getValue()); } bufferState.clear(); }
@Override public <T> void output(TupleTag<T> tag, WindowedValue<T> value) { if (!openBuffer) { emit(tag, value); } else { bufferState.add(KV.<Integer, WindowedValue<?>>of(tagsToIds.get(tag), value)); } }
@ProcessElement public void processElement( @Element KV<String, Integer> element, @StateId(stateId) BagState<Integer> state, OutputReceiver<List<Integer>> r) { ReadableState<Boolean> isEmpty = state.isEmpty(); state.add(element.getValue()); assertFalse(isEmpty.read()); Iterable<Integer> currentValue = state.read(); if (Iterables.size(currentValue) >= 4) { // Make sure that the cached Iterable doesn't change when new elements are added. state.add(-1); assertEquals(4, Iterables.size(currentValue)); assertEquals(5, Iterables.size(state.read())); List<Integer> sorted = Lists.newArrayList(currentValue); Collections.sort(sorted); r.output(sorted); } } };
@Override public Iterable<V> get(K key, W window) { initStateInternals(key); StateNamespace namespace = StateNamespaces.window(windowCoder, window); BagState<V> bagState = stateInternals.state(namespace, stateTag); return bagState.read(); }
@ProcessElement public void processElement( @TimerId(END_OF_WINDOW_ID) Timer timer, @StateId(BATCH_ID) BagState<InputT> batch, @StateId(NUM_ELEMENTS_IN_BATCH_ID) CombiningState<Long, long[], Long> numElementsInBatch, @StateId(KEY_ID) ValueState<K> key, @Element KV<K, InputT> element, BoundedWindow window, OutputReceiver<KV<K, Iterable<InputT>>> receiver) { Instant windowExpires = window.maxTimestamp().plus(allowedLateness); LOG.debug( "*** SET TIMER *** to point in time {} for window {}", windowExpires.toString(), window.toString()); timer.set(windowExpires); key.write(element.getKey()); batch.add(element.getValue()); LOG.debug("*** BATCH *** Add element for window {} ", window.toString()); // blind add is supported with combiningState numElementsInBatch.add(1L); Long num = numElementsInBatch.read(); if (num % prefetchFrequency == 0) { //prefetch data and modify batch state (readLater() modifies this) batch.readLater(); } if (num >= batchSize) { LOG.debug("*** END OF BATCH *** for window {}", window.toString()); flushBatch(receiver, key, batch, numElementsInBatch); } }
@Override public void clear(K key, W window) { initStateInternals(key); StateNamespace namespace = StateNamespaces.window(windowCoder, window); BagState<V> bagState = stateInternals.state(namespace, stateTag); bagState.clear(); }
/** * Flush elements of bufferState to Flink Output. This method can't be invoke in {@link * #snapshotState(StateSnapshotContext)} */ void flushBuffer() { for (KV<Integer, WindowedValue<?>> taggedElem : bufferState.read()) { emit(idsToTags.get(taggedElem.getKey()), (WindowedValue) taggedElem.getValue()); } bufferState.clear(); }
@Override public <T> void output(TupleTag<T> tag, WindowedValue<T> value) { if (!openBuffer) { emit(tag, value); } else { bufferState.add(KV.of(tagsToIds.get(tag), value)); } }
@Override public Iterable<V> get(K key, W window) { initStateInternals(key); StateNamespace namespace = StateNamespaces.window(windowCoder, window); BagState<V> bagState = stateInternals.state(namespace, stateTag); return bagState.read(); }
@Override public void clear(K key, W window) { initStateInternals(key); StateNamespace namespace = StateNamespaces.window(windowCoder, window); BagState<V> bagState = stateInternals.state(namespace, stateTag); bagState.clear(); }
@Test public void testMergeBagIntoSource() throws Exception { BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR); BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR); bag1.add("Hello"); bag2.add("World"); bag1.add("!"); StateMerging.mergeBags(Arrays.asList(bag1, bag2), bag1); // Reading the merged bag gets both the contents assertThat(bag1.read(), containsInAnyOrder("Hello", "World", "!")); assertThat(bag2.read(), Matchers.emptyIterable()); }
Iterable<WindowedValue<InputT>> pushedBackContents = pushedBack.read(); if (pushedBackContents != null) { for (WindowedValue<InputT> elem : pushedBackContents) { pushedBack.clear(); long min = Long.MAX_VALUE; for (WindowedValue<InputT> pushedBackValue : newPushedBack) { min = Math.min(min, pushedBackValue.getTimestamp().getMillis()); pushedBack.add(pushedBackValue);
/** * Flush elements of bufferState to Flink Output. This method can't be invoke in * {@link #snapshotState(StateSnapshotContext)} */ void flushBuffer() { for (KV<Integer, WindowedValue<?>> taggedElem : bufferState.read()) { emit(idsToTags.get(taggedElem.getKey()), (WindowedValue) taggedElem.getValue()); } bufferState.clear(); }
@Override public <T> void output(TupleTag<T> tag, WindowedValue<T> value) { if (!openBuffer) { emit(tag, value); } else { bufferState.add(KV.of(tagsToIds.get(tag), value)); } }
@Override public void close() throws Exception { super.close(); // sanity check: these should have been flushed out by +Inf watermarks if (!sideInputs.isEmpty() && nonKeyedStateInternals != null) { BagState<WindowedValue<InputT>> pushedBack = nonKeyedStateInternals.state(StateNamespaces.global(), pushedBackTag); Iterable<WindowedValue<InputT>> pushedBackContents = pushedBack.read(); if (pushedBackContents != null) { if (!Iterables.isEmpty(pushedBackContents)) { String pushedBackString = Joiner.on(",").join(pushedBackContents); throw new RuntimeException( "Leftover pushed-back data: " + pushedBackString + ". This indicates a bug."); } } } checkFinishBundleTimer.cancel(true); doFnInvoker.invokeTeardown(); }
@Test public void testMergeBagIntoNewNamespace() throws Exception { BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR); BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR); BagState<String> bag3 = underTest.state(NAMESPACE_3, STRING_BAG_ADDR); bag1.add("Hello"); bag2.add("World"); bag1.add("!"); StateMerging.mergeBags(Arrays.asList(bag1, bag2, bag3), bag3); // Reading the merged bag gets both the contents assertThat(bag3.read(), containsInAnyOrder("Hello", "World", "!")); assertThat(bag1.read(), Matchers.emptyIterable()); assertThat(bag2.read(), Matchers.emptyIterable()); }