/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Integer>>} and * returns a {@code PCollection<KV<K, Integer>>} that contains an output element mapping each * distinct key in the input {@code PCollection} to the maximum of the values associated with that * key in the input {@code PCollection}. * * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing. */ public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() { return Combine.perKey(new MaxIntegerFn()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Double>>} and returns * a {@code PCollection<KV<K, Double>>} that contains an output element mapping each distinct key * in the input {@code PCollection} to the maximum of the values associated with that key in the * input {@code PCollection}. * * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing. */ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() { return Combine.perKey(new MaxDoubleFn()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Integer>>} and * returns a {@code PCollection<KV<K, Integer>>} that contains an output element mapping each * distinct key in the input {@code PCollection} to the minimum of the values associated with that * key in the input {@code PCollection}. * * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing. */ public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() { return Combine.perKey(new MinIntegerFn()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Double>>} and returns * a {@code PCollection<KV<K, Double>>} that contains an output element mapping each distinct key * in the input {@code PCollection} to the sum of the values associated with that key in the input * {@code PCollection}. */ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() { return Combine.perKey(Sum.ofDoubles()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, T>>} and returns a * {@code PCollection<KV<K, T>>} that contains one output element per key mapping each to the * maximum of the values associated with that key in the input {@code PCollection}. * * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing. */ public static <K, T, ComparatorT extends Comparator<? super T> & Serializable> Combine.PerKey<K, T, T> perKey(ComparatorT comparator) { return Combine.perKey(Max.of(comparator)); }
/** * Returns a {@link PTransform} that counts the number of elements associated with each key of its * input {@link PCollection}. */ public static <K, V> PTransform<PCollection<KV<K, V>>, PCollection<KV<K, Long>>> perKey() { return Combine.perKey(new CountFn<V>()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Long>>} and returns a * {@code PCollection<KV<K, Long>>} that contains an output element mapping each distinct key in * the input {@code PCollection} to the minimum of the values associated with that key in the * input {@code PCollection}. * * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing. */ public static <K> Combine.PerKey<K, Long, Long> longsPerKey() { return Combine.perKey(new MinLongFn()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Double>>} and returns * a {@code PCollection<KV<K, Double>>} that contains an output element mapping each distinct key * in the input {@code PCollection} to the minimum of the values associated with that key in the * input {@code PCollection}. * * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing. */ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() { return Combine.perKey(new MinDoubleFn()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Long>>} and returns a * {@code PCollection<KV<K, Long>>} that contains an output element mapping each distinct key in * the input {@code PCollection} to the sum of the values associated with that key in the input * {@code PCollection}. */ public static <K> Combine.PerKey<K, Long, Long> longsPerKey() { return Combine.perKey(Sum.ofLongs()); }
/** * Like {@link #perKey(int, Comparator)}, but sorts values using the their natural ordering. * * @param <K> the type of the keys in the input and output {@code PCollection}s * @param <V> the type of the values in the input {@code PCollection} * @param numQuantiles the number of elements in the resulting quantile values {@code List} */ public static <K, V extends Comparable<V>> PTransform<PCollection<KV<K, V>>, PCollection<KV<K, List<V>>>> perKey(int numQuantiles) { return Combine.perKey(ApproximateQuantilesCombineFn.<V>create(numQuantiles)); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Long>>} and returns a * {@code PCollection<KV<K, Long>>} that contains an output element mapping each distinct key in * the input {@code PCollection} to the maximum of the values associated with that key in the * input {@code PCollection}. * * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing. */ public static <K> Combine.PerKey<K, Long, Long> longsPerKey() { return Combine.perKey(new MaxLongFn()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Integer>>} and * returns a {@code PCollection<KV<K, Integer>>} that contains an output element mapping each * distinct key in the input {@code PCollection} to the sum of the values associated with that key * in the input {@code PCollection}. */ public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() { return Combine.perKey(Sum.ofIntegers()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, T>>} and returns a * {@code PCollection<KV<K, T>>} that contains an output element mapping each distinct key in the * input {@code PCollection} to the maximum according to the natural ordering of {@code T} of the * values associated with that key in the input {@code PCollection}. * * <p>See {@link Combine.PerKey} for how this affects timestamps and windowing. */ public static <K, T extends Comparable<? super T>> Combine.PerKey<K, T, T> perKey() { return Combine.perKey(Max.<T>naturalOrder()); }
@Override public PCollection<KV<K, Long>> expand(PCollection<KV<K, V>> input) { Coder<KV<K, V>> inputCoder = input.getCoder(); if (!(inputCoder instanceof KvCoder)) { throw new IllegalStateException( "ApproximateUnique.PerKey requires its input to use KvCoder"); } @SuppressWarnings("unchecked") final Coder<V> coder = ((KvCoder<K, V>) inputCoder).getValueCoder(); return input.apply(Combine.perKey(new ApproximateUniqueCombineFn<>(sampleSize, coder))); }
@Override public PCollection<TableRow> expand(PCollection<KV<String, LaneInfo>> flowInfo) { // stationId, LaneInfo => stationId + max lane flow info PCollection<KV<String, LaneInfo>> flowMaxes = flowInfo.apply(Combine.perKey(new MaxFlow())); // <stationId, max lane flow info>... => row... PCollection<TableRow> results = flowMaxes.apply(ParDo.of(new FormatMaxesFn())); return results; } }
@Override public PCollection<KV<PosRgsMq, Double>> expand(PCollection<Read> input) { return input.apply(ParDo.of(new CoverageCounts(bucketWidth))) .apply(Combine.<PosRgsMq, Long>perKey(new SumCounts())) .apply(ParDo.of(new CoverageMeans())); } }
@Override public PCollection<TableRow> expand(PCollection<TableRow> rows) { // row... => <word, play_name> ... PCollection<KV<String, String>> words = rows.apply(ParDo.of(new ExtractLargeWordsFn())); // word, play_name => word, all_plays ... PCollection<KV<String, String>> wordAllPlays = words.apply(Combine.perKey(new ConcatWords())); // <word, all_plays>... => row... PCollection<TableRow> results = wordAllPlays.apply(ParDo.of(new FormatShakespeareOutputFn())); return results; } }
@Test public void testCombineGetName() { assertEquals("Combine.globally(SumInts)", Combine.globally(new SumInts()).getName()); assertEquals( "Combine.GloballyAsSingletonView", Combine.globally(new SumInts()).asSingletonView().getName()); assertEquals("Combine.perKey(Test)", Combine.perKey(new TestCombineFn()).getName()); assertEquals( "Combine.perKeyWithFanout(Test)", Combine.perKey(new TestCombineFn()).withHotKeyFanout(10).getName()); }
protected void runTestSimpleCombine( List<KV<String, Integer>> table, int globalSum, List<KV<String, String>> perKeyCombines) { PCollection<KV<String, Integer>> input = createInput(pipeline, table); PCollection<Integer> sum = input.apply(Values.create()).apply(Combine.globally(new SumInts())); PCollection<KV<String, String>> sumPerKey = input.apply(Combine.perKey(new TestCombineFn())); PAssert.that(sum).containsInAnyOrder(globalSum); PAssert.that(sumPerKey).containsInAnyOrder(perKeyCombines); pipeline.run(); }
/** Tests creation of a per-key {@link Combine} via a Java 8 method reference. */ @Test @Category(ValidatesRunner.class) public void testCombinePerKeyInstanceMethodReference() { PCollection<KV<String, Integer>> output = pipeline .apply(Create.of(KV.of("a", 1), KV.of("b", 2), KV.of("a", 3), KV.of("c", 4))) .apply(Combine.perKey(new Summer()::sum)); PAssert.that(output).containsInAnyOrder(KV.of("a", 4), KV.of("b", 2), KV.of("c", 4)); pipeline.run(); }