@Override public PCollection<KV<Contig, Iterable<Read>>> expand(PCollection<Read> reads) { return reads .apply("KeyReads", ParDo.of(new KeyReadsFn())) .apply(GroupByKey.<Contig, Read>create()); }
@Override public PCollection<KV<K, OutputT>> expand(PCollection<KV<K, InputT>> input) { return input .apply(fewKeys ? GroupByKey.createWithFewKeys() : GroupByKey.create()) .apply( Combine.<K, InputT, OutputT>groupedValues(fn, fnDisplayData) .withSideInputs(sideInputs)); }
@Override public PCollection<KeyedWorkItem<K, V>> expand(PCollection<KV<K, V>> input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), WindowingStrategy.globalDefault(), input.isBounded(), KeyedWorkItemCoder.of( GroupByKey.getKeyCoder(input.getCoder()), GroupByKey.getInputValueCoder(input.getCoder()), input.getWindowingStrategy().getWindowFn().windowCoder())); }
/** Returns the {@code Coder} of the output of this transform. */ public static <K, V> KvCoder<K, Iterable<V>> getOutputKvCoder(Coder<KV<K, V>> inputCoder) { return KvCoder.of(getKeyCoder(inputCoder), getOutputValueCoder(inputCoder)); }
@Override public PCollection<KV<K, Iterable<V>>> expand(PCollection<KV<K, V>> input) { applicableTo(input); // Verify that the input Coder<KV<K, V>> is a KvCoder<K, V>, and that // the key coder is deterministic. Coder<K> keyCoder = getKeyCoder(input.getCoder()); try { keyCoder.verifyDeterministic(); } catch (NonDeterministicException e) { throw new IllegalStateException("the keyCoder of a GroupByKey must be deterministic", e); } // This primitive operation groups by the combination of key and window, // merging windows as needed, using the windows assigned to the // key/value input elements and the window merge operation of the // window function associated with the input PCollection. return PCollection.createPrimitiveOutputInternal( input.getPipeline(), updateWindowingStrategy(input.getWindowingStrategy()), input.isBounded(), getOutputKvCoder(input.getCoder())); }
/** Returns the {@code Coder} of the {@code Iterable} values of the output of this transform. */ static <K, V> Coder<Iterable<V>> getOutputValueCoder(Coder<KV<K, V>> inputCoder) { return IterableCoder.of(getInputValueCoder(inputCoder)); }
@Override public PCollection<KV<Contig, Iterable<Read>>> expand(PCollection<Read> reads) { return reads .apply("KeyReads", ParDo.of(new KeyReadsFn())) .apply(GroupByKey.<Contig, Read>create()); }
@Test public void testDisplayData() { GroupByKey<String, String> groupByKey = GroupByKey.create(); GroupByKey<String, String> groupByFewKeys = GroupByKey.createWithFewKeys(); DisplayData gbkDisplayData = DisplayData.from(groupByKey); DisplayData fewKeysDisplayData = DisplayData.from(groupByFewKeys); assertThat(gbkDisplayData.items(), empty()); assertThat(fewKeysDisplayData, hasDisplayItem("fewKeys", true)); }
@Override public PCollection<KeyedWorkItem<K, V>> expand(PCollection<KV<K, V>> input) { return PCollection.createPrimitiveOutputInternal( input.getPipeline(), WindowingStrategy.globalDefault(), input.isBounded(), KeyedWorkItemCoder.of( GroupByKey.getKeyCoder(input.getCoder()), GroupByKey.getInputValueCoder(input.getCoder()), input.getWindowingStrategy().getWindowFn().windowCoder())); }
@Override public PCollection<Iterable<InputT>> expand(PCollection<InputT> input) { return input .apply(WithKeys.of((Void) null)) .apply(GroupByKey.create()) .apply(Values.create()); } }
/** * Tests that the translator is registered so the URN can be retrieved (the only thing you can * meaningfully do with a {@link GroupByKey}). */ @Test public void testUrnRetrievable() throws Exception { assertThat( PTransformTranslation.urnForTransform(GroupByKey.create()), equalTo(GROUP_BY_KEY_TRANSFORM_URN)); } }
@Override public PCollection<T> expand(PCollection<T> input) { return input .apply("Break fusion mapper", ParDo.of(new DummyMapFn<T>())) .apply(GroupByKey.<T, Integer>create()) .apply(Keys.<T>create()); }
@Override public PCollection<T> expand(PCollection<T> input) { return input .apply("Break fusion mapper", ParDo.of(new DummyMapFn<T>())) .apply(GroupByKey.<T, Integer>create()) .apply(Keys.<T>create()); }
/** * @param input PCollection of variants to process. * @return PCollection of variant-only Variant objects with calls from non-variant-segments * merged into the SNP variants with which they overlap. */ @Override public PCollection<Variant> expand(PCollection<Variant> input) { return input .apply(ParDo.of(new BinVariantsFn())) .apply(GroupByKey.<KV<String, Long>, Variant>create()) .apply(ParDo.of(new CombineVariantsFn())); }
/** * @param input PCollection of variants to process. * @return PCollection of variant-only Variant objects with calls from non-variant-segments * merged into the SNP variants with which they overlap. */ @Override public PCollection<Variant> expand(PCollection<Variant> input) { return input .apply(ParDo.of(new BinVariantsFn())) .apply(GroupByKey.<KV<String, Long>, Variant>create()) .apply(ParDo.of(new CombineVariantsFn())); }
private PCollection<Result<DestinationT>> writeShardedRecords( PCollection<KV<ShardedKey<DestinationT>, TableRow>> shardedRecords, PCollectionView<String> tempFilePrefix) { return shardedRecords .apply("GroupByDestination", GroupByKey.create()) .apply( "WriteGroupedRecords", ParDo.of(new WriteGroupedRecordsToFiles<DestinationT>(tempFilePrefix, maxFileSize)) .withSideInputs(tempFilePrefix)) .setCoder(WriteBundlesToFiles.ResultCoder.of(destinationCoder)); }
@Override public PCollection<TableRow> expand(PCollection<KV<String, StationSpeed>> stationSpeed) { // Apply a GroupByKey transform to collect a list of all station // readings for a given route. PCollection<KV<String, Iterable<StationSpeed>>> timeGroup = stationSpeed.apply(GroupByKey.create()); // Analyze 'slowdown' over the route readings. PCollection<KV<String, RouteInfo>> stats = timeGroup.apply(ParDo.of(new GatherStats())); // Format the results for writing to BigQuery PCollection<TableRow> results = stats.apply(ParDo.of(new FormatStatsFn())); return results; } }
@Override public PCollection<TableRow> expand(PCollection<KV<String, Integer>> flowInfo) { PCollection<KV<String, Iterable<Integer>>> flowPerFreeway = flowInfo.apply(GroupByKey.create()); PCollection<KV<String, String>> results = flowPerFreeway.apply( ParDo.of( new DoFn<KV<String, Iterable<Integer>>, KV<String, String>>() { @ProcessElement public void processElement(ProcessContext c) throws Exception { Iterable<Integer> flows = c.element().getValue(); Integer sum = 0; Long numberOfRecords = 0L; for (Integer value : flows) { sum += value; numberOfRecords++; } c.output(KV.of(c.element().getKey(), sum + "," + numberOfRecords)); } })); PCollection<TableRow> output = results.apply(ParDo.of(new FormatTotalFlow(triggerType))); return output; } }
.apply(GroupByKey.create()) .apply(MapElements.via(new SimpleFunction<KV<String, Iterable<String>>, String>() { @Override