static <T, FinalT, ViewT, W extends BoundedWindow> PCollection<?> applyForSingleton( DataflowRunner runner, PCollection<T> input, DoFn<KV<Integer, Iterable<KV<W, WindowedValue<T>>>>, IsmRecord<WindowedValue<FinalT>>> doFn, Coder<FinalT> defaultValueCoder, PCollectionView<ViewT> view) { @SuppressWarnings("unchecked") Coder<W> windowCoder = (Coder<W>) input.getWindowingStrategy().getWindowFn().windowCoder(); IsmRecordCoder<WindowedValue<FinalT>> ismCoder = coderForSingleton(windowCoder, defaultValueCoder); PCollection<IsmRecord<WindowedValue<FinalT>>> reifiedPerWindowAndSorted = input .apply(new GroupByWindowHashAsKeyAndWindowAsSortKey<T, W>(ismCoder)) .apply(ParDo.of(doFn)); reifiedPerWindowAndSorted.setCoder(ismCoder); runner.addPCollectionRequiringIndexedFormat(reifiedPerWindowAndSorted); reifiedPerWindowAndSorted.apply(CreateDataflowView.forBatch(view)); return reifiedPerWindowAndSorted; }
static <T, W extends BoundedWindow, ViewT> PCollection<?> applyForIterableLike( DataflowRunner runner, PCollection<T> input, PCollectionView<ViewT> view) { @SuppressWarnings("unchecked") Coder<W> windowCoder = (Coder<W>) input.getWindowingStrategy().getWindowFn().windowCoder(); IsmRecordCoder<WindowedValue<T>> ismCoder = coderForListLike(windowCoder, input.getCoder()); // If we are working in the global window, we do not need to do a GBK using the window // as the key since all the elements of the input PCollection are already such. // We just reify the windowed value while converting them to IsmRecords and generating // an index based upon where we are within the bundle. Each bundle // maps to one file exactly. if (input.getWindowingStrategy().getWindowFn() instanceof GlobalWindows) { PCollection<IsmRecord<WindowedValue<T>>> reifiedPerWindowAndSorted = input.apply(ParDo.of(new ToIsmRecordForGlobalWindowDoFn<>())); reifiedPerWindowAndSorted.setCoder(ismCoder); runner.addPCollectionRequiringIndexedFormat(reifiedPerWindowAndSorted); reifiedPerWindowAndSorted.apply(CreateDataflowView.forBatch(view)); return reifiedPerWindowAndSorted; } PCollection<IsmRecord<WindowedValue<T>>> reifiedPerWindowAndSorted = input .apply(new GroupByWindowHashAsKeyAndWindowAsSortKey<T, W>(ismCoder)) .apply(ParDo.of(new ToIsmRecordForNonGlobalWindowDoFn<>(windowCoder))); reifiedPerWindowAndSorted.setCoder(ismCoder); runner.addPCollectionRequiringIndexedFormat(reifiedPerWindowAndSorted); reifiedPerWindowAndSorted.apply(CreateDataflowView.forBatch(view)); return reifiedPerWindowAndSorted; }
runner.addPCollectionRequiringIndexedFormat(perHashWithReifiedWindows); runner.addPCollectionRequiringIndexedFormat(windowMapSizeMetadata); runner.addPCollectionRequiringIndexedFormat(windowMapKeysMetadata);