/** * Returns a {@link PTransform} that counts the number of elements in its input {@link * PCollection}. * * <p>Note: if the input collection uses a windowing strategy other than {@link GlobalWindows}, * use {@code Combine.globally(Count.<T>combineFn()).withoutDefaults()} instead. */ public static <T> PTransform<PCollection<T>, PCollection<Long>> globally() { return Combine.globally(new CountFn<T>()); }
@Override public PCollection<OutputT> expand(PCollection<InputT> input) { return input.apply(Combine.globally(combineFn)); } }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code * PCollection<T>} whose contents is the maximum according to the natural ordering of {@code T} of * the input {@code PCollection}'s elements, or {@code null} if there are no elements. */ public static <T extends Comparable<? super T>> Combine.Globally<T, T> globally() { return Combine.globally(Max.<T>naturalOrder()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code * PCollection<T>} whose contents is the maximum of the input {@code PCollection}'s elements, or * {@code null} if there are no elements. */ public static <T, ComparatorT extends Comparator<? super T> & Serializable> Combine.Globally<T, T> globally(ComparatorT comparator) { return Combine.globally(Max.of(comparator)); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<Integer>} and returns a * {@code PCollection<Integer>} whose contents is a single value that is the minimum of the input * {@code PCollection}'s elements, or {@code Integer.MAX_VALUE} if there are no elements. */ public static Combine.Globally<Integer, Integer> integersGlobally() { return Combine.globally(new MinIntegerFn()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<Long>} and returns a {@code * PCollection<Long>} whose contents is the minimum of the input {@code PCollection}'s elements, * or {@code Long.MAX_VALUE} if there are no elements. */ public static Combine.Globally<Long, Long> longsGlobally() { return Combine.globally(new MinLongFn()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<Double>} and returns a * {@code PCollection<Double>} whose contents is the sum of the input {@code PCollection}'s * elements, or {@code 0} if there are no elements. */ public static Combine.Globally<Double, Double> doublesGlobally() { return Combine.globally(Sum.ofDoubles()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<Double>} and returns a * {@code PCollection<Double>} whose contents is the maximum of the input {@code PCollection}'s * elements, or {@code Double.NEGATIVE_INFINITY} if there are no elements. */ public static Combine.Globally<Double, Double> doublesGlobally() { return Combine.globally(new MaxDoubleFn()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<Double>} and returns a * {@code PCollection<Double>} whose contents is the minimum of the input {@code PCollection}'s * elements, or {@code Double.POSITIVE_INFINITY} if there are no elements. */ public static Combine.Globally<Double, Double> doublesGlobally() { return Combine.globally(new MinDoubleFn()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<T>} and returns a {@code * PCollection<T>} whose contents is the minimum of the input {@code PCollection}'s elements, or * {@code null} if there are no elements. */ public static <T, ComparatorT extends Comparator<? super T> & Serializable> Combine.Globally<T, T> globally(ComparatorT comparator) { return Combine.globally(Min.of(comparator)); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<NumT>} and returns a {@code * PCollection<Double>} whose contents is the mean of the input {@code PCollection}'s elements, or * {@code 0} if there are no elements. * * @param <NumT> the type of the {@code Number}s being combined */ public static <NumT extends Number> Combine.Globally<NumT, Double> globally() { return Combine.globally(Mean.of()); }
/** * Like {@link #globally(int, Comparator)}, but sorts using the elements' natural ordering. * * @param <T> the type of the elements in the input {@code PCollection} * @param numQuantiles the number of elements in the resulting quantile values {@code List} */ public static <T extends Comparable<T>> PTransform<PCollection<T>, PCollection<List<T>>> globally( int numQuantiles) { return Combine.globally(ApproximateQuantilesCombineFn.<T>create(numQuantiles)); }
@Override public PCollection<T> expand(PCollection<T> in) { return in.apply(Combine.globally(new SampleAnyCombineFn<T>(limit)).withoutDefaults()) .apply(Flatten.iterables()); }
@Override public PCollection<ElemT> expand(PCollection<ElemT> input) { input .apply(Combine.globally(new Concatenate<ElemT>()).withoutDefaults()) .apply(CreateFlinkPCollectionView.<ElemT, ViewT>of(view)); return input; }
@Override public PCollection<ElemT> expand(PCollection<ElemT> input) { input .apply(Combine.globally(new Concatenate<ElemT>()).withoutDefaults()) .apply(CreateFlinkPCollectionView.of(view)); return input; }
@Test public void testWithDefaultsPreservesSideInputs() { final PCollectionView<Integer> view = pipeline.apply(Create.of(1)).apply(Sum.integersGlobally().asSingletonView()); Combine.Globally<Integer, String> combine = Combine.globally(new TestCombineFnWithContext(view)) .withSideInputs(view) .withoutDefaults(); assertEquals(Collections.singletonList(view), combine.getSideInputs()); }
@Test public void testCombineGetName() { assertEquals("Combine.globally(SumInts)", Combine.globally(new SumInts()).getName()); assertEquals( "Combine.GloballyAsSingletonView", Combine.globally(new SumInts()).asSingletonView().getName()); assertEquals("Combine.perKey(Test)", Combine.perKey(new TestCombineFn()).getName()); assertEquals( "Combine.perKeyWithFanout(Test)", Combine.perKey(new TestCombineFn()).withHotKeyFanout(10).getName()); }
/** Tests creation of a global {@link Combine} via a Java 8 method reference. */ @Test @Category(ValidatesRunner.class) public void testCombineGloballyInstanceMethodReference() { PCollection<Integer> output = pipeline.apply(Create.of(1, 2, 3, 4)).apply(Combine.globally(new Summer()::sum)); PAssert.that(output).containsInAnyOrder(10); pipeline.run(); } }
protected void runTestSimpleCombine( List<KV<String, Integer>> table, int globalSum, List<KV<String, String>> perKeyCombines) { PCollection<KV<String, Integer>> input = createInput(pipeline, table); PCollection<Integer> sum = input.apply(Values.create()).apply(Combine.globally(new SumInts())); PCollection<KV<String, String>> sumPerKey = input.apply(Combine.perKey(new TestCombineFn())); PAssert.that(sum).containsInAnyOrder(globalSum); PAssert.that(sumPerKey).containsInAnyOrder(perKeyCombines); pipeline.run(); }
@Test @Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class}) public void testWindowedCombineEmpty() { PCollection<Double> mean = pipeline .apply(Create.empty(BigEndianIntegerCoder.of())) .apply(Window.into(FixedWindows.of(Duration.millis(1)))) .apply(Combine.globally(new MeanInts()).withoutDefaults()); PAssert.that(mean).empty(); pipeline.run(); }