public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); SpannerConfig spannerConfig = SpannerConfig.create() .withInstanceId(options.getInstanceId()) .withDatabaseId(options.getDatabaseId()); // [START spanner_dataflow_readall] PCollection<Struct> allRecords = p.apply(SpannerIO.read() .withSpannerConfig(spannerConfig) .withQuery("SELECT t.table_name FROM information_schema.tables AS t WHERE t" + ".table_catalog = '' AND t.table_schema = ''")).apply( MapElements.into(TypeDescriptor.of(ReadOperation.class)) .via((SerializableFunction<Struct, ReadOperation>) input -> { String tableName = input.getString(0); return ReadOperation.create().withQuery("SELECT * FROM " + tableName); })).apply(SpannerIO.readAll().withSpannerConfig(spannerConfig)); // [END spanner_dataflow_readall] PCollection<Long> dbEstimatedSize = allRecords.apply(EstimateSize.create()) .apply(Sum.longsGlobally()); dbEstimatedSize.apply(ToString.elements()).apply(TextIO.write().to(options.getOutput()) .withoutSharding()); p.run().waitUntilFinish(); }
/** * Returns a new {@link MapElements} transform with the given type descriptor for the output type, * but the mapping function yet to be specified using {@link #via(SerializableFunction)}. */ public static <OutputT> MapElements<?, OutputT> into(final TypeDescriptor<OutputT> outputType) { return new MapElements<>(null, null, null, outputType); }
private static MapElements<Row, Void> logRecords(String suffix) { return MapElements.via( new SimpleFunction<Row, Void>() { @Override public @Nullable Void apply(Row input) { System.out.println(input.getValues() + suffix); return null; } }); }
@Override public PCollection<String> expand(PCollection<? extends Iterable<?>> input) { return input.apply( MapElements.via( new SimpleFunction<Iterable<?>, String>() { @Override public String apply(Iterable<?> input) { return Joiner.on(delimiter).join(input); } })); } }
@Override public PCollection<String> expand(PCollection<?> input) { return input.apply( MapElements.via( new SimpleFunction<Object, String>() { @Override public String apply(Object input) { return input.toString(); } })); } }
/** * For a {@code SerializableFunction<InputT, OutputT>} {@code fn} and output type descriptor, * returns a {@code PTransform} that takes an input {@code PCollection<InputT>} and returns a * {@code PCollection<OutputT>} containing {@code fn.apply(v)} for every element {@code v} in the * input. * * <p>Example of use in Java 8: * * <pre>{@code * PCollection<Integer> wordLengths = words.apply( * MapElements.into(TypeDescriptors.integers()) * .via((String word) -> word.length())); * }</pre> * * <p>In Java 7, the overload {@link #via(SimpleFunction)} is more concise as the output type * descriptor need not be provided. */ public <NewInputT> MapElements<NewInputT, OutputT> via( SerializableFunction<NewInputT, OutputT> fn) { return new MapElements<>(Contextful.fn(fn), fn, TypeDescriptors.inputOf(fn), outputType); }
@Override public PCollection<String> expand(PCollection<? extends KV<?, ?>> input) { return input.apply( MapElements.via( new SimpleFunction<KV<?, ?>, String>() { @Override public String apply(KV<?, ?> input) { return input.getKey().toString() + delimiter + input.getValue().toString(); } })); } }
/** * Like {@link #via(SerializableFunction)}, but supports access to context, such as side inputs. */ @Experimental(Kind.CONTEXTFUL) public <NewInputT> MapElements<NewInputT, OutputT> via(Contextful<Fn<NewInputT, OutputT>> fn) { return new MapElements<>( fn, fn.getClosure(), TypeDescriptors.inputOf(fn.getClosure()), outputType); }
@Override public PCollection<T> expand(PCollection<T> input) { List<PCollectionView<?>> views = Lists.newArrayList(); for (int i = 0; i < signals.size(); ++i) { views.add(signals.get(i).apply("To wait view " + i, new ToWaitView())); } return input.apply( "Wait", MapElements.into(input.getCoder().getEncodedTypeDescriptor()) .via(fn((t, c) -> t, requiresSideInputs(views)))); } }
private static PTransform<PCollection<? extends String>, PCollection<String>> addSuffix( final String suffix) { return MapElements.via( new SimpleFunction<String, String>() { @Override public String apply(String input) { return input + suffix; } }); }
/** * For a {@code SimpleFunction<InputT, OutputT>} {@code fn}, returns a {@code PTransform} that * takes an input {@code PCollection<InputT>} and returns a {@code PCollection<OutputT>} * containing {@code fn.apply(v)} for every element {@code v} in the input. * * <p>This overload is intended primarily for use in Java 7. In Java 8, the overload {@link * #via(SerializableFunction)} supports use of lambda for greater concision. * * <p>Example of use in Java 7: * * <pre>{@code * PCollection<String> words = ...; * PCollection<Integer> wordsPerLine = words.apply(MapElements.via( * new SimpleFunction<String, Integer>() { * public Integer apply(String word) { * return word.length(); * } * })); * }</pre> */ public static <InputT, OutputT> MapElements<InputT, OutputT> via( final SimpleFunction<InputT, OutputT> fn) { return new MapElements<>( Contextful.fn(fn), fn, fn.getInputTypeDescriptor(), fn.getOutputTypeDescriptor()); }
@Override public PCollection<Export> expand(PBegin input) { NestedValueProvider<String, String> manifestFile = NestedValueProvider.of(importDirectory, s -> GcsUtil.joinPath(s, "spanner-export.json")); return input .apply("Read manifest", FileIO.match().filepattern(manifestFile)) .apply( "Resource id", MapElements.into(TypeDescriptor.of(ResourceId.class)) .via((MatchResult.Metadata::resourceId))) .apply( "Read manifest json", MapElements.into(TypeDescriptor.of(Export.class)) .via(ReadExportManifestFile::readManifest)); }
@Override public PCollection<V> expand(PCollection<? extends KV<?, V>> in) { return in.apply( "Values", MapElements.via( new SimpleFunction<KV<?, V>, V>() { @Override public V apply(KV<?, V> kv) { return kv.getValue(); } })); } }
@Override public PCollection<TableRow> expand(PCollection<String> stringPCollection) { return stringPCollection.apply( "JsonToTableRow", MapElements.via( new SimpleFunction<String, TableRow>() { @Override public TableRow apply(String json) { return convertJsonToTableRow(json); } })); } }
@Override public PCollection<KV<String, Integer>> expand(PCollection<GameActionInfo> gameInfo) { return gameInfo .apply( MapElements.into( TypeDescriptors.kvs(TypeDescriptors.strings(), TypeDescriptors.integers())) .via((GameActionInfo gInfo) -> KV.of(gInfo.getKey(field), gInfo.getScore()))) .apply(Sum.integersPerKey()); } }
@Override public PCollection<KV<V, K>> expand(PCollection<KV<K, V>> in) { return in.apply( "KvSwap", MapElements.via( new SimpleFunction<KV<K, V>, KV<V, K>>() { @Override public KV<V, K> apply(KV<K, V> kv) { return KV.of(kv.getValue(), kv.getKey()); } })); } }
@Test public void testSerializableFunctionDisplayData() { SerializableFunction<Integer, Integer> serializableFn = input -> input; MapElements<?, ?> serializableMap = MapElements.into(integers()).via(serializableFn); assertThat( DisplayData.from(serializableMap), hasDisplayItem("class", serializableFn.getClass())); }