public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); SpannerConfig spannerConfig = SpannerConfig.create() .withInstanceId(options.getInstanceId()) .withDatabaseId(options.getDatabaseId()); // [START spanner_dataflow_readall] PCollection<Struct> allRecords = p.apply(SpannerIO.read() .withSpannerConfig(spannerConfig) .withQuery("SELECT t.table_name FROM information_schema.tables AS t WHERE t" + ".table_catalog = '' AND t.table_schema = ''")).apply( MapElements.into(TypeDescriptor.of(ReadOperation.class)) .via((SerializableFunction<Struct, ReadOperation>) input -> { String tableName = input.getString(0); return ReadOperation.create().withQuery("SELECT * FROM " + tableName); })).apply(SpannerIO.readAll().withSpannerConfig(spannerConfig)); // [END spanner_dataflow_readall] PCollection<Long> dbEstimatedSize = allRecords.apply(EstimateSize.create()) .apply(Sum.longsGlobally()); dbEstimatedSize.apply(ToString.elements()).apply(TextIO.write().to(options.getOutput()) .withoutSharding()); p.run().waitUntilFinish(); }
SpannerConfig spannerConfig = SpannerConfig.create() .withInstanceId(instanceId) .withDatabaseId(databaseId);
/** * Returns a transform that creates a batch transaction. By default, {@link * TimestampBound#strong()} transaction is created, to override this use {@link * CreateTransaction#withTimestampBound(TimestampBound)}. */ @Experimental public static CreateTransaction createTransaction() { return new AutoValue_SpannerIO_CreateTransaction.Builder() .setSpannerConfig(SpannerConfig.create()) .setTimestampBound(TimestampBound.strong()) .build(); }
/** * A {@link PTransform} that works like {@link #read}, but executes read operations coming from a * {@link PCollection}. */ @Experimental(Experimental.Kind.SOURCE_SINK) public static ReadAll readAll() { return new AutoValue_SpannerIO_ReadAll.Builder() .setSpannerConfig(SpannerConfig.create()) .setTimestampBound(TimestampBound.strong()) .setBatching(true) .build(); }
/** * Creates an uninitialized instance of {@link Write}. Before use, the {@link Write} must be * configured with a {@link Write#withInstanceId} and {@link Write#withDatabaseId} that identify * the Cloud Spanner database being written. */ @Experimental public static Write write() { return new AutoValue_SpannerIO_Write.Builder() .setSpannerConfig(SpannerConfig.create()) .setBatchSizeBytes(DEFAULT_BATCH_SIZE_BYTES) .setMaxNumMutations(DEFAULT_MAX_NUM_MUTATIONS) .setGroupingFactor(DEFAULT_GROUPING_FACTOR) .setFailureMode(FailureMode.FAIL_FAST) .build(); }
private SpannerConfig createSpannerConfig() { return SpannerConfig.create() .withProjectId(project) .withInstanceId(options.getInstanceId()) .withDatabaseId(databaseName); }
/** * Creates an uninitialized instance of {@link Read}. Before use, the {@link Read} must be * configured with a {@link Read#withInstanceId} and {@link Read#withDatabaseId} that identify the * Cloud Spanner database. */ @Experimental(Experimental.Kind.SOURCE_SINK) public static Read read() { return new AutoValue_SpannerIO_Read.Builder() .setSpannerConfig(SpannerConfig.create()) .setTimestampBound(TimestampBound.strong()) .setReadOperation(ReadOperation.create()) .setBatching(true) .build(); }
SpannerConfig.create() .withProjectId("test") .withInstanceId("123")
SpannerConfig.create() .withProjectId("test-project") .withInstanceId("test-instance")
SpannerConfig.create() .withProjectId("test") .withInstanceId("123")
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); SpannerConfig spannerConfig = SpannerConfig.create() .withHost(options.getSpannerHost()) .withInstanceId(options.getInstanceId()) .withDatabaseId(options.getDatabaseId()); p.apply(new ImportTransform(spannerConfig, options.getInputDir(), options.getWaitForIndexes())); PipelineResult result = p.run(); if (options.getWaitUntilFinish() && /* Only if template location is null, there is a dataflow job to wait for. Else it's * template generation which doesn't start a dataflow job. */ options.as(DataflowPipelineOptions.class).getTemplateLocation() == null) { result.waitUntilFinish(); } } }
SpannerConfig.create() .withHost(options.getSpannerHost()) .withInstanceId(options.getInstanceId())
SpannerConfig.create() .withProjectId(options.getSpannerProjectId()) .withInstanceId(options.getSpannerInstanceId())