/** * Transforms each item in the iterable of the input {@link PCollection} to a {@link String} using * the {@link Object#toString} method followed by a "," until the last element in the iterable. * There is no trailing delimiter. */ public static PTransform<PCollection<? extends Iterable<?>>, PCollection<String>> iterables() { return iterables(","); }
/** * Transforms each element of the input {@link PCollection} to a {@link String} by using the * {@link Object#toString} on the key followed by a "," followed by the {@link Object#toString} of * the value. */ public static PTransform<PCollection<? extends KV<?, ?>>, PCollection<String>> kvs() { return kvs(","); }
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); String instanceId = options.getInstanceId(); String databaseId = options.getDatabaseId(); // [START spanner_dataflow_read] // Query for all the columns and rows in the specified Spanner table PCollection<Struct> records = p.apply( SpannerIO.read() .withInstanceId(instanceId) .withDatabaseId(databaseId) .withQuery("SELECT * FROM " + options.getTable())); // [END spanner_dataflow_read] PCollection<Long> tableEstimatedSize = records // Estimate the size of every row .apply(EstimateSize.create()) // Sum all the row sizes to get the total estimated size of the table .apply(Sum.longsGlobally()); // Write the total size to a file tableEstimatedSize .apply(ToString.elements()) .apply(TextIO.write().to(options.getOutput()).withoutSharding()); p.run().waitUntilFinish(); } }
.apply(ToString.elements()) .apply(TextIO.write().to(options.getOutput()).withoutSharding());
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); SpannerConfig spannerConfig = SpannerConfig.create() .withInstanceId(options.getInstanceId()) .withDatabaseId(options.getDatabaseId()); // [START spanner_dataflow_readall] PCollection<Struct> allRecords = p.apply(SpannerIO.read() .withSpannerConfig(spannerConfig) .withQuery("SELECT t.table_name FROM information_schema.tables AS t WHERE t" + ".table_catalog = '' AND t.table_schema = ''")).apply( MapElements.into(TypeDescriptor.of(ReadOperation.class)) .via((SerializableFunction<Struct, ReadOperation>) input -> { String tableName = input.getString(0); return ReadOperation.create().withQuery("SELECT * FROM " + tableName); })).apply(SpannerIO.readAll().withSpannerConfig(spannerConfig)); // [END spanner_dataflow_readall] PCollection<Long> dbEstimatedSize = allRecords.apply(EstimateSize.create()) .apply(Sum.longsGlobally()); dbEstimatedSize.apply(ToString.elements()).apply(TextIO.write().to(options.getOutput()) .withoutSharding()); p.run().waitUntilFinish(); }
@Test @Category(NeedsRunner.class) public void testToStringIterable() { ArrayList<Iterable<String>> iterables = new ArrayList<>(); iterables.add(Arrays.asList(new String[] {"one", "two", "three"})); iterables.add(Arrays.asList(new String[] {"four", "five", "six"})); ArrayList<String> expected = new ArrayList<>(); expected.add("one,two,three"); expected.add("four,five,six"); PCollection<Iterable<String>> input = p.apply(Create.of(iterables).withCoder(IterableCoder.of(StringUtf8Coder.of()))); PCollection<String> output = input.apply(ToString.iterables()); PAssert.that(output).containsInAnyOrder(expected); p.run(); }
@Test @Category(NeedsRunner.class) public void testToStringKV() { ArrayList<KV<String, Integer>> kvs = new ArrayList<>(); kvs.add(KV.of("one", 1)); kvs.add(KV.of("two", 2)); ArrayList<String> expected = new ArrayList<>(); expected.add("one,1"); expected.add("two,2"); PCollection<KV<String, Integer>> input = p.apply(Create.of(kvs)); PCollection<String> output = input.apply(ToString.kvs()); PAssert.that(output).containsInAnyOrder(expected); p.run(); }
@Test @Category(NeedsRunner.class) public void testToStringOf() { Integer[] ints = {1, 2, 3, 4, 5}; String[] strings = {"1", "2", "3", "4", "5"}; PCollection<Integer> input = p.apply(Create.of(Arrays.asList(ints))); PCollection<String> output = input.apply(ToString.elements()); PAssert.that(output).containsInAnyOrder(strings); p.run(); }
@Test @Category(NeedsRunner.class) public void testToStringIterableWithDelimiter() { ArrayList<Iterable<String>> iterables = new ArrayList<>(); iterables.add(Arrays.asList(new String[] {"one", "two", "three"})); iterables.add(Arrays.asList(new String[] {"four", "five", "six"})); ArrayList<String> expected = new ArrayList<>(); expected.add("one\ttwo\tthree"); expected.add("four\tfive\tsix"); PCollection<Iterable<String>> input = p.apply(Create.of(iterables).withCoder(IterableCoder.of(StringUtf8Coder.of()))); PCollection<String> output = input.apply(ToString.iterables("\t")); PAssert.that(output).containsInAnyOrder(expected); p.run(); } }
@Test @Category(NeedsRunner.class) public void testToStringKVWithDelimiter() { ArrayList<KV<String, Integer>> kvs = new ArrayList<>(); kvs.add(KV.of("one", 1)); kvs.add(KV.of("two", 2)); ArrayList<String> expected = new ArrayList<>(); expected.add("one\t1"); expected.add("two\t2"); PCollection<KV<String, Integer>> input = p.apply(Create.of(kvs)); PCollection<String> output = input.apply(ToString.kvs("\t")); PAssert.that(output).containsInAnyOrder(expected); p.run(); }
@Test @Category(NeedsRunner.class) public void testReadWatchForNewFiles() throws IOException, InterruptedException { final Path basePath = tempFolder.getRoot().toPath().resolve("readWatch"); basePath.toFile().mkdir(); p.apply(GenerateSequence.from(0).to(10).withRate(1, Duration.millis(100))) .apply( Window.<Long>into(FixedWindows.of(Duration.millis(150))) .withAllowedLateness(Duration.ZERO) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .discardingFiredPanes()) .apply(ToString.elements()) .apply( TextIO.write() .to(basePath.resolve("data").toString()) .withNumShards(1) .withWindowedWrites()); PCollection<String> lines = p.apply( TextIO.read() .from(basePath.resolve("*").toString()) .watchForNewFiles( Duration.millis(100), Watch.Growth.afterTimeSinceNewOutput(Duration.standardSeconds(3)))); PAssert.that(lines).containsInAnyOrder("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"); p.run(); } }