/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Long>>} and returns a * {@code PCollection<KV<K, Long>>} that contains an output element mapping each distinct key in * the input {@code PCollection} to the sum of the values associated with that key in the input * {@code PCollection}. */ public static <K> Combine.PerKey<K, Long, Long> longsPerKey() { return Combine.perKey(Sum.ofLongs()); }
@Test public void testSumGetNames() { assertEquals("Combine.globally(SumInteger)", Sum.integersGlobally().getName()); assertEquals("Combine.globally(SumDouble)", Sum.doublesGlobally().getName()); assertEquals("Combine.globally(SumLong)", Sum.longsGlobally().getName()); assertEquals("Combine.perKey(SumInteger)", Sum.integersPerKey().getName()); assertEquals("Combine.perKey(SumDouble)", Sum.doublesPerKey().getName()); assertEquals("Combine.perKey(SumLong)", Sum.longsPerKey().getName()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Integer>>} and * returns a {@code PCollection<KV<K, Integer>>} that contains an output element mapping each * distinct key in the input {@code PCollection} to the sum of the values associated with that key * in the input {@code PCollection}. */ public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() { return Combine.perKey(Sum.ofIntegers()); }
/** {@link CombineFn} for Sum based on {@link Sum} and {@link Combine.BinaryCombineFn}. */ static CombineFn createSum(Schema.TypeName fieldType) { switch (fieldType) { case INT32: return Sum.ofIntegers(); case INT16: return new ShortSum(); case BYTE: return new ByteSum(); case INT64: return Sum.ofLongs(); case FLOAT: return new FloatSum(); case DOUBLE: return Sum.ofDoubles(); case DECIMAL: return new BigDecimalSum(); default: throw new UnsupportedOperationException( String.format("[%s] is not support in SUM", fieldType)); } }
@Before public void setup() { MockitoAnnotations.initMocks(this); PCollection<Integer> created = p.apply(Create.of(1, 2, 3)); singletonView = created .apply(Window.into(new IdentitySideInputWindowFn())) .apply(Sum.integersGlobally().asSingletonView()); underlying = new TestDoFnRunner<>(); }
@Test @Category(NeedsRunner.class) public void testGloballyWithSchemaAggregateFn() { Collection<AggregatePojos> elements = ImmutableList.of( new AggregatePojos(1, 1, 2), new AggregatePojos(2, 1, 3), new AggregatePojos(3, 2, 4), new AggregatePojos(4, 2, 5)); PCollection<Row> aggregate = pipeline .apply(Create.of(elements)) .apply( Group.<AggregatePojos>globally() .aggregateField("field1", Sum.ofLongs(), "field1_sum") .aggregateField("field3", Sum.ofIntegers(), "field3_sum") .aggregateField("field1", Top.largestLongsFn(1), "field1_top")); Schema aggregateSchema = Schema.builder() .addInt64Field("field1_sum") .addInt32Field("field3_sum") .addArrayField("field1_top", FieldType.INT64) .build(); Row expectedRow = Row.withSchema(aggregateSchema).addValues(10L, 14).addArray(4L).build(); PAssert.that(aggregate).containsInAnyOrder(expectedRow); pipeline.run(); }
@Override public PCollection<String> expand(PCollection<KV<String, Integer>> input) { return input .apply(Sum.integersPerKey()) .apply(ParDo.of(new FormatAsStrings())); }
.apply(Sum.longsGlobally());
@Override public PDone expand(PCollection<KV<KV<String, String>, Long>> similarPairs) { return similarPairs .apply(Sum.<KV<String, String>>longsPerKey()) .apply(Combine.globally(TO_LIST)) .apply("PCoAAnalysis", ParDo.of(new PCoAnalysis(dataIndices))) .apply("FormatGraphData", ParDo .of(new DoFn<Iterable<PCoAnalysis.GraphResult>, String>() { @ProcessElement public void processElement(ProcessContext c) throws Exception { Iterable<PCoAnalysis.GraphResult> graphResults = c.element(); for (PCoAnalysis.GraphResult result : graphResults) { c.output(result.toString()); } } })) .apply("WriteCounts", TextIO.write().to(outputFile)); } }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<Double>} and returns a * {@code PCollection<Double>} whose contents is the sum of the input {@code PCollection}'s * elements, or {@code 0} if there are no elements. */ public static Combine.Globally<Double, Double> doublesGlobally() { return Combine.globally(Sum.ofDoubles()); }
@Test public void countAssertsMultipleCallsIndependent() { PCollection<Integer> create = pipeline.apply("FirstCreate", Create.of(1, 2, 3)); PAssert.that(create).containsInAnyOrder(1, 2, 3); PAssert.thatSingleton(create.apply(Sum.integersGlobally())).isEqualTo(6); assertThat(PAssert.countAsserts(pipeline), equalTo(2)); PAssert.thatMap(pipeline.apply("CreateMap", Create.of(KV.of(1, 2)))) .isEqualTo(Collections.singletonMap(1, 2)); assertThat(PAssert.countAsserts(pipeline), equalTo(3)); } }
.apply( Group.<OuterAggregate>globally() .aggregateFields(field1Selector, Sum.ofLongs(), "field1_sum") .aggregateFields(field3Selector, Sum.ofIntegers(), "field3_sum") .aggregateFields(field1Selector, Top.largestLongsFn(1), "field1_top")); Schema aggregateSchema =
@Test public void testGetAccumulatorCoderEquals() { Combine.BinaryCombineIntegerFn sumIntegerFn = Sum.ofIntegers(); assertEquals( sumIntegerFn.getAccumulatorCoder(STANDARD_REGISTRY, VarIntCoder.of()), sumIntegerFn.getAccumulatorCoder(STANDARD_REGISTRY, VarIntCoder.of())); assertNotEquals( sumIntegerFn.getAccumulatorCoder(STANDARD_REGISTRY, VarIntCoder.of()), sumIntegerFn.getAccumulatorCoder(STANDARD_REGISTRY, BigEndianIntegerCoder.of())); Combine.BinaryCombineLongFn sumLongFn = Sum.ofLongs(); assertEquals( sumLongFn.getAccumulatorCoder(STANDARD_REGISTRY, VarLongCoder.of()), sumLongFn.getAccumulatorCoder(STANDARD_REGISTRY, VarLongCoder.of())); assertNotEquals( sumLongFn.getAccumulatorCoder(STANDARD_REGISTRY, VarLongCoder.of()), sumLongFn.getAccumulatorCoder(STANDARD_REGISTRY, BigEndianLongCoder.of())); Combine.BinaryCombineDoubleFn sumDoubleFn = Sum.ofDoubles(); assertEquals( sumDoubleFn.getAccumulatorCoder(STANDARD_REGISTRY, DoubleCoder.of()), sumDoubleFn.getAccumulatorCoder(STANDARD_REGISTRY, DoubleCoder.of())); } }
@Override public PCollection<String> expand(PCollection<KV<String, Integer>> input) { return input .apply(Window.into(new ValidityWindows())) .apply(Sum.integersPerKey()) .apply(ParDo.of(new FormatAsStrings())); }
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); String instanceId = options.getInstanceId(); String databaseId = options.getDatabaseId(); // [START spanner_dataflow_read] // Query for all the columns and rows in the specified Spanner table PCollection<Struct> records = p.apply( SpannerIO.read() .withInstanceId(instanceId) .withDatabaseId(databaseId) .withQuery("SELECT * FROM " + options.getTable())); // [END spanner_dataflow_read] PCollection<Long> tableEstimatedSize = records // Estimate the size of every row .apply(EstimateSize.create()) // Sum all the row sizes to get the total estimated size of the table .apply(Sum.longsGlobally()); // Write the total size to a file tableEstimatedSize .apply(ToString.elements()) .apply(TextIO.write().to(options.getOutput()).withoutSharding()); p.run().waitUntilFinish(); } }
@Override public PDone expand(PCollection<KV<KV<String, String>, Long>> similarPairs) { return similarPairs .apply(Sum.<KV<String, String>>longsPerKey()) .apply(Combine.globally(TO_LIST)) .apply("PCoAAnalysis", ParDo.of(new PCoAnalysis(dataIndices))) .apply("FormatGraphData", ParDo .of(new DoFn<Iterable<PCoAnalysis.GraphResult>, String>() { @ProcessElement public void processElement(ProcessContext c) throws Exception { Iterable<PCoAnalysis.GraphResult> graphResults = c.element(); for (PCoAnalysis.GraphResult result : graphResults) { c.output(result.toString()); } } })) .apply("WriteCounts", TextIO.write().to(outputFile)); } }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Double>>} and returns * a {@code PCollection<KV<K, Double>>} that contains an output element mapping each distinct key * in the input {@code PCollection} to the sum of the values associated with that key in the input * {@code PCollection}. */ public static <K> Combine.PerKey<K, Double, Double> doublesPerKey() { return Combine.perKey(Sum.ofDoubles()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<Long>} and returns a {@code * PCollection<Long>} whose contents is the sum of the input {@code PCollection}'s elements, or * {@code 0} if there are no elements. */ public static Combine.Globally<Long, Long> longsGlobally() { return Combine.globally(Sum.ofLongs()); }
@Test public void testWithDefaultsPreservesSideInputs() { final PCollectionView<Integer> view = pipeline.apply(Create.of(1)).apply(Sum.integersGlobally().asSingletonView()); Combine.Globally<Integer, String> combine = Combine.globally(new TestCombineFnWithContext(view)) .withSideInputs(view) .withoutDefaults(); assertEquals(Collections.singletonList(view), combine.getSideInputs()); }
.apply( Group.<AggregatePojos>byFieldNames("field2") .aggregateField("field1", Sum.ofLongs(), "field1_sum") .aggregateField("field3", Sum.ofIntegers(), "field3_sum") .aggregateField("field1", Top.largestLongsFn(1), "field1_top"));