.apply(Sum.longsGlobally());
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); String instanceId = options.getInstanceId(); String databaseId = options.getDatabaseId(); // [START spanner_dataflow_read] // Query for all the columns and rows in the specified Spanner table PCollection<Struct> records = p.apply( SpannerIO.read() .withInstanceId(instanceId) .withDatabaseId(databaseId) .withQuery("SELECT * FROM " + options.getTable())); // [END spanner_dataflow_read] PCollection<Long> tableEstimatedSize = records // Estimate the size of every row .apply(EstimateSize.create()) // Sum all the row sizes to get the total estimated size of the table .apply(Sum.longsGlobally()); // Write the total size to a file tableEstimatedSize .apply(ToString.elements()) .apply(TextIO.write().to(options.getOutput()).withoutSharding()); p.run().waitUntilFinish(); } }
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); SpannerConfig spannerConfig = SpannerConfig.create() .withInstanceId(options.getInstanceId()) .withDatabaseId(options.getDatabaseId()); // [START spanner_dataflow_readall] PCollection<Struct> allRecords = p.apply(SpannerIO.read() .withSpannerConfig(spannerConfig) .withQuery("SELECT t.table_name FROM information_schema.tables AS t WHERE t" + ".table_catalog = '' AND t.table_schema = ''")).apply( MapElements.into(TypeDescriptor.of(ReadOperation.class)) .via((SerializableFunction<Struct, ReadOperation>) input -> { String tableName = input.getString(0); return ReadOperation.create().withQuery("SELECT * FROM " + tableName); })).apply(SpannerIO.readAll().withSpannerConfig(spannerConfig)); // [END spanner_dataflow_readall] PCollection<Long> dbEstimatedSize = allRecords.apply(EstimateSize.create()) .apply(Sum.longsGlobally()); dbEstimatedSize.apply(ToString.elements()).apply(TextIO.write().to(options.getOutput()) .withoutSharding()); p.run().waitUntilFinish(); }
@Test public void testSumGetNames() { assertEquals("Combine.globally(SumInteger)", Sum.integersGlobally().getName()); assertEquals("Combine.globally(SumDouble)", Sum.doublesGlobally().getName()); assertEquals("Combine.globally(SumLong)", Sum.longsGlobally().getName()); assertEquals("Combine.perKey(SumInteger)", Sum.integersPerKey().getName()); assertEquals("Combine.perKey(SumDouble)", Sum.doublesPerKey().getName()); assertEquals("Combine.perKey(SumLong)", Sum.longsPerKey().getName()); }
@Test public void getReplacementTransformGetSideInputs() { PCollectionView<Long> sideLong = pipeline .apply("LongSideInputVals", Create.of(-1L, -2L, -4L)) .apply("SideLongView", Sum.longsGlobally().asSingletonView()); PCollectionView<List<String>> sideStrings = pipeline .apply("StringSideInputVals", Create.of("foo", "bar", "baz")) .apply("SideStringsView", View.asList()); ParDo.SingleOutput<Integer, Long> originalTransform = ParDo.of(new ToLongFn()).withSideInputs(sideLong, sideStrings); PCollection<? extends Integer> input = pipeline.apply(Create.of(1, 2, 3)); AppliedPTransform< PCollection<? extends Integer>, PCollection<Long>, ParDo.SingleOutput<Integer, Long>> application = AppliedPTransform.of( "original", input.expand(), input.apply(originalTransform).expand(), originalTransform, pipeline); PTransformReplacement<PCollection<? extends Integer>, PCollection<Long>> replacementTransform = factory.getReplacementTransform(application); ParDoSingle<Integer, Long> parDoSingle = (ParDoSingle<Integer, Long>) replacementTransform.getTransform(); assertThat(parDoSingle.getSideInputs(), containsInAnyOrder(sideStrings, sideLong)); }
@Test @Category({NeedsRunner.class, UsesTestStream.class}) public void testProcessingTimeTrigger() { TestStream<Long> source = TestStream.create(VarLongCoder.of()) .addElements( TimestampedValue.of(1L, new Instant(1000L)), TimestampedValue.of(2L, new Instant(2000L))) .advanceProcessingTime(Duration.standardMinutes(12)) .addElements(TimestampedValue.of(3L, new Instant(3000L))) .advanceProcessingTime(Duration.standardMinutes(6)) .advanceWatermarkToInfinity(); PCollection<Long> sum = p.apply(source) .apply( Window.<Long>configure() .triggering( AfterWatermark.pastEndOfWindow() .withEarlyFirings( AfterProcessingTime.pastFirstElementInPane() .plusDelayOf(Duration.standardMinutes(5)))) .accumulatingFiredPanes() .withAllowedLateness(Duration.ZERO)) .apply(Sum.longsGlobally()); PAssert.that(sum).inEarlyGlobalWindowPanes().containsInAnyOrder(3L, 6L); p.run(); }
.apply(Sum.longsGlobally());