@Override public void setup() { sideInputReader = new SideInputHandler(sideInputs, InMemoryStateInternals.<Void>forKey(null)); doFnInvoker = DoFnInvokers.invokerFor(doFn); doFnInvoker.invokeSetup(); doFnRunner = doFnRunnerFactory.createRunner(sideInputReader); pushedBackValues = new ArrayList<>(); outputManager.setup(mainOutput, sideOutputs); }
/** * Add the side input value. Here we are assuming that views have already been materialized and * are sent over the wire as {@link Iterable}. Subclasses may elect to perform materialization in * state and receive side input incrementally instead. * * @param streamRecord */ protected void addSideInputValue(StreamRecord<RawUnionValue> streamRecord) { @SuppressWarnings("unchecked") WindowedValue<Iterable<?>> value = (WindowedValue<Iterable<?>>) streamRecord.getValue().getValue(); PCollectionView<?> sideInput = sideInputTagMapping.get(streamRecord.getValue().getUnionTag()); sideInputHandler.addSideInputValue(sideInput, value); }
@Override public Iterable<T> get(byte[] key, W window) { return checkNotNull( (Iterable<T>) runnerHandler.getIterable(collection, window), "Element processed by SDK before side input is ready"); }
@Test public void testContains() { SideInputHandler sideInputHandler = new SideInputHandler(ImmutableList.of(view1), InMemoryStateInternals.<Void>forKey(null)); assertTrue(sideInputHandler.contains(view1)); assertFalse(sideInputHandler.contains(view2)); }
@Test public void testIsEmpty() { SideInputHandler sideInputHandler = new SideInputHandler(ImmutableList.of(view1), InMemoryStateInternals.<Void>forKey(null)); assertFalse(sideInputHandler.isEmpty()); // create an empty handler SideInputHandler emptySideInputHandler = new SideInputHandler(ImmutableList.of(), InMemoryStateInternals.<Void>forKey(null)); assertTrue(emptySideInputHandler.isEmpty()); }
@Test public void testMultipleSideInputs() { SideInputHandler sideInputHandler = new SideInputHandler( ImmutableList.of(view1, view2), InMemoryStateInternals.<Void>forKey(null)); // two windows that we'll later use for adding elements/retrieving side input IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(WINDOW_MSECS_1)); // add value for view1 in the first window sideInputHandler.addSideInputValue( view1, valuesInWindow( materializeValuesFor(View.asIterable(), "Hello"), new Instant(0), firstWindow)); assertThat(sideInputHandler.get(view1, firstWindow), contains("Hello")); // view2 should not have any data assertFalse(sideInputHandler.isReady(view2, firstWindow)); // also add some data for view2 sideInputHandler.addSideInputValue( view2, valuesInWindow( materializeValuesFor(View.asIterable(), "Salut"), new Instant(0), firstWindow)); assertTrue(sideInputHandler.isReady(view2, firstWindow)); assertThat(sideInputHandler.get(view2, firstWindow), contains("Salut")); // view1 should not be affected by that assertThat(sideInputHandler.get(view1, firstWindow), contains("Hello")); }
@Test public void testNewInputReplacesPreviousInput() { // new input should completely replace old input // the creation of the Iterable that has the side input // contents happens upstream. this is also where // accumulation/discarding is decided. SideInputHandler sideInputHandler = new SideInputHandler(ImmutableList.of(view1), InMemoryStateInternals.<Void>forKey(null)); IntervalWindow window = new IntervalWindow(new Instant(0), new Instant(WINDOW_MSECS_1)); // add a first value for view1 sideInputHandler.addSideInputValue( view1, valuesInWindow(materializeValuesFor(View.asIterable(), "Hello"), new Instant(0), window)); assertThat(sideInputHandler.get(view1, window), contains("Hello")); // subsequent values should replace existing values sideInputHandler.addSideInputValue( view1, valuesInWindow( materializeValuesFor(View.asIterable(), "Ciao", "Buongiorno"), new Instant(0), window)); assertThat(sideInputHandler.get(view1, window), contains("Ciao", "Buongiorno")); }
@Test public void testIsReady() { SideInputHandler sideInputHandler = new SideInputHandler( ImmutableList.of(view1, view2), InMemoryStateInternals.<Void>forKey(null)); IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(WINDOW_MSECS_1)); IntervalWindow secondWindow = new IntervalWindow(new Instant(0), new Instant(WINDOW_MSECS_2)); // side input should not yet be ready assertFalse(sideInputHandler.isReady(view1, firstWindow)); // add a value for view1 sideInputHandler.addSideInputValue( view1, valuesInWindow( materializeValuesFor(View.asIterable(), "Hello"), new Instant(0), firstWindow)); // now side input should be ready assertTrue(sideInputHandler.isReady(view1, firstWindow)); // second window input should still not be ready assertFalse(sideInputHandler.isReady(view1, secondWindow)); }
WindowedValue<Iterable<?>> sideInputValue = (WindowedValue<Iterable<?>>) unionValue.getValue(); sideInputReader.addSideInputValue(sideInput, sideInputValue); for (BoundedWindow win : value.getWindows()) { BoundedWindow sideInputWindow = sideInput.getWindowMappingFn().getSideInputWindow(win); if (!sideInputReader.isReady(sideInput, sideInputWindow)) { Object emptyValue = WindowedValue.of( new ArrayList<>(), value.getTimestamp(), sideInputWindow, value.getPane()); sideInputReader.addSideInputValue(sideInput, (WindowedValue<Iterable<?>>) emptyValue);
@Test public void testMultipleWindows() { SideInputHandler sideInputHandler = new SideInputHandler(ImmutableList.of(view1), InMemoryStateInternals.<Void>forKey(null)); // two windows that we'll later use for adding elements/retrieving side input IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(WINDOW_MSECS_1)); IntervalWindow secondWindow = new IntervalWindow(new Instant(1000), new Instant(1000 + WINDOW_MSECS_2)); // add a first value for view1 in the first window sideInputHandler.addSideInputValue( view1, valuesInWindow( materializeValuesFor(View.asIterable(), "Hello"), new Instant(0), firstWindow)); assertThat(sideInputHandler.get(view1, firstWindow), contains("Hello")); // add something for second window of view1 sideInputHandler.addSideInputValue( view1, valuesInWindow( materializeValuesFor(View.asIterable(), "Arrivederci"), new Instant(0), secondWindow)); assertThat(sideInputHandler.get(view1, secondWindow), contains("Arrivederci")); // contents for first window should be unaffected assertThat(sideInputHandler.get(view1, firstWindow), contains("Hello")); }
/** * Add the side input value. Here we are assuming that views have already been materialized and * are sent over the wire as {@link Iterable}. Subclasses may elect to perform materialization in * state and receive side input incrementally instead. * * @param streamRecord */ protected void addSideInputValue(StreamRecord<RawUnionValue> streamRecord) { @SuppressWarnings("unchecked") WindowedValue<Iterable<?>> value = (WindowedValue<Iterable<?>>) streamRecord.getValue().getValue(); PCollectionView<?> sideInput = sideInputTagMapping.get(streamRecord.getValue().getUnionTag()); sideInputHandler.addSideInputValue(sideInput, value); }
SideInputReader sideInputReader = NullSideInputReader.of(sideInputs); if (!sideInputs.isEmpty()) { sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals); sideInputReader = sideInputHandler;
@Override public Iterable<T> get(byte[] key, W window) { return checkNotNull( (Iterable<T>) runnerHandler.getIterable(collection, window), "Element processed by SDK before side input is ready"); }
@Override protected void addSideInputValue(StreamRecord<RawUnionValue> streamRecord) { @SuppressWarnings("unchecked") WindowedValue<KV<Void, Iterable<?>>> value = (WindowedValue<KV<Void, Iterable<?>>>) streamRecord.getValue().getValue(); PCollectionView<?> sideInput = sideInputTagMapping.get(streamRecord.getValue().getUnionTag()); sideInputHandler.addSideInputValue(sideInput, value.withValue(value.getValue().getValue())); }
getContainingTask().getIndexInSubtaskGroup(), getOperatorStateBackend()); sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals); sideInputReader = sideInputHandler;
@Override public Iterable<V> get(byte[] key, W window) { Iterable<KV<K, V>> values = (Iterable<KV<K, V>>) runnerHandler.getIterable(collection, window); ArrayList<V> result = new ArrayList<>(); // find values for the given key for (KV<K, V> kv : values) { ByteArrayOutputStream bos = new ByteArrayOutputStream(); try { keyCoder.encode(kv.getKey(), bos); if (Arrays.equals(key, bos.toByteArray())) { result.add(kv.getValue()); } } catch (IOException ex) { throw new RuntimeException(ex); } } return result; }
@Override protected void addSideInputValue(StreamRecord<RawUnionValue> streamRecord) { @SuppressWarnings("unchecked") WindowedValue<KV<Void, Iterable<?>>> value = (WindowedValue<KV<Void, Iterable<?>>>) streamRecord.getValue().getValue(); PCollectionView<?> sideInput = sideInputTagMapping.get(streamRecord.getValue().getUnionTag()); sideInputHandler.addSideInputValue(sideInput, value.withValue(value.getValue().getValue())); }
getContainingTask().getIndexInSubtaskGroup(), getOperatorStateBackend()); sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals); sideInputReader = sideInputHandler;
@Override public Iterable<V> get(byte[] key, W window) { Iterable<KV<K, V>> values = (Iterable<KV<K, V>>) runnerHandler.getIterable(collection, window); ArrayList<V> result = new ArrayList<>(); // find values for the given key for (KV<K, V> kv : values) { ByteArrayOutputStream bos = new ByteArrayOutputStream(); try { keyCoder.encode(kv.getKey(), bos); if (Arrays.equals(key, bos.toByteArray())) { result.add(kv.getValue()); } } catch (IOException ex) { throw new RuntimeException(ex); } } return result; }
@Override public void process(ApexStreamTuple<WindowedValue<Iterable<?>>> t) { if (t instanceof ApexStreamTuple.WatermarkTuple) { // ignore side input watermarks return; } int sideInputIndex = 0; if (t instanceof ApexStreamTuple.DataTuple) { sideInputIndex = ((ApexStreamTuple.DataTuple<?>) t).getUnionTag(); } if (traceTuples) { LOG.debug("\nsideInput {} {}\n", sideInputIndex, t.getValue()); } PCollectionView<?> sideInput = sideInputs.get(sideInputIndex); sideInputHandler.addSideInputValue(sideInput, t.getValue()); List<WindowedValue<InputT>> newPushedBack = new ArrayList<>(); for (WindowedValue<InputT> elem : pushedBack.get()) { Iterable<WindowedValue<InputT>> justPushedBack = processElementInReadyWindows(elem); Iterables.addAll(newPushedBack, justPushedBack); } pushedBack.get().clear(); pushedBackWatermark.clear(); for (WindowedValue<InputT> pushedBackValue : newPushedBack) { pushedBackWatermark.add(pushedBackValue.getTimestamp().getMillis()); pushedBack.get().add(pushedBackValue); } // potentially emit watermark processWatermark(ApexStreamTuple.WatermarkTuple.of(currentInputWatermark)); } };