public FlinkSideInputReader( Map<PCollectionView<?>, WindowingStrategy<?, ?>> indexByView, RuntimeContext runtimeContext) { for (PCollectionView<?> view : indexByView.keySet()) { checkArgument( Materializations.MULTIMAP_MATERIALIZATION_URN.equals( view.getViewFn().getMaterialization().getUrn()), "This handler is only capable of dealing with %s materializations " + "but was asked to handle %s for PCollectionView with tag %s.", Materializations.MULTIMAP_MATERIALIZATION_URN, view.getViewFn().getMaterialization().getUrn(), view.getTagInternal().getId()); } sideInputs = new HashMap<>(); for (Map.Entry<PCollectionView<?>, WindowingStrategy<?, ?>> entry : indexByView.entrySet()) { sideInputs.put(entry.getKey().getTagInternal(), entry.getValue()); } this.runtimeContext = runtimeContext; }
/** * Create SideInputs as Broadcast variables. * * @param views The {@link PCollectionView}s. * @param context The {@link JavaSparkContext}. * @param pviews The {@link SparkPCollectionView}. * @return a map of tagged {@link SideInputBroadcast}s and their {@link WindowingStrategy}. */ public static Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> getSideInputs( List<PCollectionView<?>> views, JavaSparkContext context, SparkPCollectionView pviews) { if (views == null) { return ImmutableMap.of(); } else { Map<TupleTag<?>, KV<WindowingStrategy<?, ?>, SideInputBroadcast<?>>> sideInputs = Maps.newHashMap(); for (PCollectionView<?> view : views) { SideInputBroadcast helper = pviews.getPCollectionView(view, context); WindowingStrategy<?, ?> windowingStrategy = view.getWindowingStrategyInternal(); sideInputs.put(view.getTagInternal(), KV.of(windowingStrategy, helper)); } return sideInputs; } }
@Nullable @Override public <T> T get(PCollectionView<T> view, BoundedWindow window) { Iterable<?> elements = getIterable(view, window); // TODO: Add support for choosing which representation is contained based upon the // side input materialization. We currently can assume that we always have a multimap // materialization as that is the only supported type within the Java SDK. ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn(); Coder<?> keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder(); return (T) viewFn.apply(InMemoryMultimapSideInputView.fromIterable(keyCoder, (Iterable) elements)); }
/** * Expands a list of {@link PCollectionView} into the form needed for {@link * PTransform#getAdditionalInputs()}. */ public static Map<TupleTag<?>, PValue> toAdditionalInputs(Iterable<PCollectionView<?>> views) { ImmutableMap.Builder<TupleTag<?>, PValue> additionalInputs = ImmutableMap.builder(); for (PCollectionView<?> view : views) { additionalInputs.put(view.getTagInternal(), view.getPCollection()); } return additionalInputs.build(); }
SideInput sideInput = parDoPayload.getSideInputsOrThrow(view.getTagInternal().getId()); PCollectionView<?> restoredView = PCollectionViewTranslation.viewFromProto( sideInput, view.getTagInternal().getId(), view.getPCollection(), protoTransform, rehydratedComponents); assertThat(restoredView.getTagInternal(), equalTo(view.getTagInternal())); assertThat(restoredView.getViewFn(), instanceOf(view.getViewFn().getClass())); assertThat( restoredView.getWindowMappingFn(), instanceOf(view.getWindowMappingFn().getClass())); assertThat( restoredView.getWindowingStrategyInternal(), equalTo(view.getWindowingStrategyInternal().fixDefaults())); assertThat(restoredView.getCoderInternal(), equalTo(view.getCoderInternal()));
checkArgument( Materializations.MULTIMAP_MATERIALIZATION_URN.equals( sideInput.getViewFn().getMaterialization().getUrn()), "This handler is only capable of dealing with %s materializations " + "but was asked to handle %s for PCollectionView with tag %s.", Materializations.MULTIMAP_MATERIALIZATION_URN, sideInput.getViewFn().getMaterialization().getUrn(), sideInput.getTagInternal().getId()); sideInput.getWindowingStrategyInternal().getWindowFn().windowCoder(); "side-input-available-windows-" + sideInput.getTagInternal().getId(), SetCoder.of(windowCoder), new WindowSetCombineFn()); "side-input-data-" + sideInput.getTagInternal().getId(), (Coder) IterableCoder.of(sideInput.getCoderInternal())); sideInputContentsTags.put(sideInput, stateTag);
sideInputs.get(view.getTagInternal()); checkNotNull(windowedBroadcastHelper, "SideInput for view " + view + " is not available."); final BoundedWindow sideInputWindow = view.getWindowMappingFn().getSideInputWindow(window); .collect(Collectors.toList()); ViewFn<MultimapView, T> viewFn = (ViewFn<MultimapView, T>) view.getViewFn(); Coder keyCoder = ((KvCoder<?, ?>) view.getCoderInternal()).getKeyCoder(); return (T) viewFn.apply(
@Override public <T> boolean contains(PCollectionView<T> view) { return sideInputs.containsKey(view.getTagInternal()); }
@Override public void visitPrimitiveTransform(Node node) { if (node.getTransform() instanceof WriteView) { assertThat( "There should only be one WriteView primitive in the graph", writeViewVisited.getAndSet(true), is(false)); PCollectionView<?> replacementView = ((WriteView) node.getTransform()).getView(); // replacementView.getPCollection() is null, but that is not a requirement // so not asserted one way or the other assertThat( replacementView.getTagInternal(), equalTo((TupleTag) view.getTagInternal())); assertThat(replacementView.getViewFn(), equalTo(view.getViewFn())); assertThat(replacementView.getWindowMappingFn(), equalTo(view.getWindowMappingFn())); assertThat(node.getInputs().entrySet(), hasSize(1)); } } });
/** * When a callAfterWindowCloses with the specified view's producing transform, window, and * windowing strategy is invoked, immediately execute the callback. */ private void immediatelyInvokeCallback(PCollectionView<?> view, BoundedWindow window) { doAnswer( invocation -> { Object callback = invocation.getArguments()[3]; Runnable callbackRunnable = (Runnable) callback; callbackRunnable.run(); return null; }) .when(context) .scheduleAfterOutputWouldBeProduced( Mockito.eq(view), Mockito.eq(window), Mockito.eq(view.getWindowingStrategyInternal()), Mockito.any(Runnable.class)); }
@Nullable @Override public <T> T get(final PCollectionView<T> view, final BoundedWindow window) { // This gets called after isReady() final T sideInputData = (T) inMemorySideInputs.get(Pair.of(view, window)); return sideInputData == null // The upstream gave us an empty sideInput ? ((ViewFn<Object, T>) view.getViewFn()).apply(new CreateViewTransform.MultiView<T>(Collections.emptyList())) // The upstream gave us a concrete sideInput : sideInputData; }
int count = 0; for (PCollectionView<?> sideInput: sideInputs) { TupleTag<?> tag = sideInput.getTagInternal(); intToViewMapping.put(count, sideInput); tagToIntMapping.put(tag, count); count++; Coder<Iterable<WindowedValue<?>>> coder = sideInput.getCoderInternal(); TupleTag<?> tag = sideInput.getTagInternal(); final int intTag = tagToIntMapping.get(tag); DataStream<Object> sideInputStream = context.getInputDataStream(sideInput);
public static SideInput translateView(PCollectionView<?> view, SdkComponents components) { Builder builder = SideInput.newBuilder(); builder.setAccessPattern( FunctionSpec.newBuilder().setUrn(view.getViewFn().getMaterialization().getUrn()).build()); builder.setViewFn(translateViewFn(view.getViewFn(), components)); builder.setWindowMappingFn(translateWindowMappingFn(view.getWindowMappingFn(), components)); return builder.build(); }
private boolean isReady(BoundedWindow mainInputWindow) { if (notReadyWindows.contains(mainInputWindow)) { return false; } for (PCollectionView<?> view : views) { BoundedWindow sideInputWindow = view.getWindowMappingFn().getSideInputWindow(mainInputWindow); if (!sideInputReader.isReady(view, sideInputWindow)) { return false; } } return true; }
@Override public PCollection<ElemT> expand(PCollection<ElemT> input) { if (streaming) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), input.getWindowingStrategy(), input.isBounded(), input.getCoder()); } return (PCollection) view.getPCollection(); }
@Override public <T> boolean contains(PCollectionView<T> view) { return sideInputs.containsKey(view.getTagInternal()); }
@Test public void getMainInputSingleOutputSideInputs() { AppliedPTransform<PCollection<Long>, ?, ?> application = AppliedPTransform.of( "application", ImmutableMap.<TupleTag<?>, PValue>builder() .put(new TupleTag<Long>(), mainInput) .put(sideInput.getTagInternal(), sideInput.getPCollection()) .build(), Collections.singletonMap(new TupleTag<Long>(), output), ParDo.of(new TestDoFn()).withSideInputs(sideInput), pipeline); PCollection<Long> input = PTransformReplacements.getSingletonMainInput(application); assertThat(input, equalTo(mainInput)); }
/** * Retrieve the value as written by {@link #addSideInputValue(PCollectionView, WindowedValue)}, * without applying the SDK specific {@link ViewFn}. * * @param view * @param window * @param <T> * @return */ public <T> Iterable<?> getIterable(PCollectionView<T> view, BoundedWindow window) { @SuppressWarnings("unchecked") Coder<BoundedWindow> windowCoder = (Coder<BoundedWindow>) view.getWindowingStrategyInternal().getWindowFn().windowCoder(); StateTag<ValueState<Iterable<?>>> stateTag = sideInputContentsTags.get(view); ValueState<Iterable<?>> state = stateInternals.state(StateNamespaces.window(windowCoder, window), stateTag); Iterable<?> elements = state.read(); // return empty collection when no side input was received for ready window return (elements != null) ? elements : Collections.emptyList(); }
public static PTransformMatcher createViewWithViewFn(final Class<? extends ViewFn> viewFnType) { return application -> { if (!(application.getTransform() instanceof CreatePCollectionView)) { return false; } CreatePCollectionView<?, ?> createView = (CreatePCollectionView<?, ?>) application.getTransform(); ViewFn<?, ?> viewFn = createView.getView().getViewFn(); return viewFn.getClass().equals(viewFnType); }; }
@Override public <T> T sideInput(PCollectionView<T> view) { return sideInputReader.get( view, view.getWindowMappingFn().getSideInputWindow(mainInputWindow)); }