public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); SpannerConfig spannerConfig = SpannerConfig.create() .withInstanceId(options.getInstanceId()) .withDatabaseId(options.getDatabaseId()); // [START spanner_dataflow_readall] PCollection<Struct> allRecords = p.apply(SpannerIO.read() .withSpannerConfig(spannerConfig) .withQuery("SELECT t.table_name FROM information_schema.tables AS t WHERE t" + ".table_catalog = '' AND t.table_schema = ''")).apply( MapElements.into(TypeDescriptor.of(ReadOperation.class)) .via((SerializableFunction<Struct, ReadOperation>) input -> { String tableName = input.getString(0); return ReadOperation.create().withQuery("SELECT * FROM " + tableName); })).apply(SpannerIO.readAll().withSpannerConfig(spannerConfig)); // [END spanner_dataflow_readall] PCollection<Long> dbEstimatedSize = allRecords.apply(EstimateSize.create()) .apply(Sum.longsGlobally()); dbEstimatedSize.apply(ToString.elements()).apply(TextIO.write().to(options.getOutput()) .withoutSharding()); p.run().waitUntilFinish(); }
public SpannerConfig withProjectId(String projectId) { return withProjectId(ValueProvider.StaticValueProvider.of(projectId)); }
@Setup public void setup() throws Exception { spannerAccessor = config.connectToSpanner(); }
public void validate() { checkNotNull( getInstanceId(), "SpannerIO.read() requires instance id to be set with withInstanceId method"); checkNotNull( getDatabaseId(), "SpannerIO.read() requires database id to be set with withDatabaseId method"); }
private SpannerConfig createSpannerConfig() { return SpannerConfig.create() .withProjectId(project) .withInstanceId(options.getInstanceId()) .withDatabaseId(databaseName); }
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); SpannerConfig spannerConfig = SpannerConfig.create() .withHost(options.getSpannerHost()) .withInstanceId(options.getInstanceId()) .withDatabaseId(options.getDatabaseId()); p.apply(new ImportTransform(spannerConfig, options.getInputDir(), options.getWaitForIndexes())); PipelineResult result = p.run(); if (options.getWaitUntilFinish() && /* Only if template location is null, there is a dataflow job to wait for. Else it's * template generation which doesn't start a dataflow job. */ options.as(DataflowPipelineOptions.class).getTemplateLocation() == null) { result.waitUntilFinish(); } } }
public void populateDisplayData(DisplayData.Builder builder) { builder .addIfNotNull(DisplayData.item("projectId", getProjectId()).withLabel("Output Project")) .addIfNotNull(DisplayData.item("instanceId", getInstanceId()).withLabel("Output Instance")) .addIfNotNull(DisplayData.item("databaseId", getDatabaseId()).withLabel("Output Database")); if (getServiceFactory() != null) { builder.addIfNotNull( DisplayData.item("serviceFactory", getServiceFactory().getClass().getName()) .withLabel("Service Factory")); } }
public SpannerConfig withDatabaseId(String databaseId) { return withDatabaseId(ValueProvider.StaticValueProvider.of(databaseId)); }
public SpannerConfig withInstanceId(String instanceId) { return withInstanceId(ValueProvider.StaticValueProvider.of(instanceId)); }
/** * Returns a transform that creates a batch transaction. By default, {@link * TimestampBound#strong()} transaction is created, to override this use {@link * CreateTransaction#withTimestampBound(TimestampBound)}. */ @Experimental public static CreateTransaction createTransaction() { return new AutoValue_SpannerIO_CreateTransaction.Builder() .setSpannerConfig(SpannerConfig.create()) .setTimestampBound(TimestampBound.strong()) .build(); }
@VisibleForTesting Read withServiceFactory(ServiceFactory<Spanner, SpannerOptions> serviceFactory) { SpannerConfig config = getSpannerConfig(); return withSpannerConfig(config.withServiceFactory(serviceFactory)); }
/** Specifies the Cloud Spanner host. */ public Read withHost(ValueProvider<String> host) { SpannerConfig config = getSpannerConfig(); return withSpannerConfig(config.withHost(host)); }
SpannerConfig.create() .withProjectId("test-project") .withInstanceId("test-instance") .withDatabaseId("test-database") .withServiceFactory(serviceFactory);
SpannerConfig.create() .withHost(options.getSpannerHost()) .withInstanceId(options.getInstanceId()) .withDatabaseId(options.getDatabaseId()); p.begin() .apply(
public SpannerAccessor connectToSpanner() { SpannerOptions.Builder builder = SpannerOptions.newBuilder(); if (getProjectId() != null) { builder.setProjectId(getProjectId().get()); } if (getServiceFactory() != null) { builder.setServiceFactory(this.getServiceFactory()); } if (getHost() != null) { builder.setHost(getHost().get()); } String userAgentString = USER_AGENT_PREFIX + "/" + ReleaseInfo.getReleaseInfo().getVersion(); builder.setHeaderProvider(FixedHeaderProvider.create("user-agent", userAgentString)); SpannerOptions options = builder.build(); Spanner spanner = options.getService(); DatabaseClient databaseClient = spanner.getDatabaseClient( DatabaseId.of(options.getProjectId(), getInstanceId().get(), getDatabaseId().get())); BatchClient batchClient = spanner.getBatchClient( DatabaseId.of(options.getProjectId(), getInstanceId().get(), getDatabaseId().get())); DatabaseAdminClient databaseAdminClient = spanner.getDatabaseAdminClient(); return new SpannerAccessor(spanner, databaseClient, databaseAdminClient, batchClient); } }
/** Specifies the Cloud Spanner database. */ public ReadAll withDatabaseId(ValueProvider<String> databaseId) { SpannerConfig config = getSpannerConfig(); return withSpannerConfig(config.withDatabaseId(databaseId)); }
/** Specifies the Cloud Spanner instance. */ public Read withInstanceId(ValueProvider<String> instanceId) { SpannerConfig config = getSpannerConfig(); return withSpannerConfig(config.withInstanceId(instanceId)); }
/** * A {@link PTransform} that works like {@link #read}, but executes read operations coming from a * {@link PCollection}. */ @Experimental(Experimental.Kind.SOURCE_SINK) public static ReadAll readAll() { return new AutoValue_SpannerIO_ReadAll.Builder() .setSpannerConfig(SpannerConfig.create()) .setTimestampBound(TimestampBound.strong()) .setBatching(true) .build(); }
@ProcessElement public void processElement(ProcessContext c) { String instanceId = spannerConfig.getInstanceId().get(); String dbId = spannerConfig.getDatabaseId().get(); // For direct runner or tests we need a deterministic jobId. String testJobId = ExportTransform.this.testJobId.get(); if (!Strings.isNullOrEmpty(testJobId)) { c.output(testJobId); return; } try { DataflowWorkerHarnessOptions workerHarnessOptions = c.getPipelineOptions().as(DataflowWorkerHarnessOptions.class); String jobId = workerHarnessOptions.getJobId(); c.output(instanceId + "-" + dbId + "-" + jobId); } catch (Exception e) { throw new IllegalStateException( "Please specify --testJobId to run with non-dataflow runner"); } } }))
@VisibleForTesting Write withServiceFactory(ServiceFactory<Spanner, SpannerOptions> serviceFactory) { SpannerConfig config = getSpannerConfig(); return withSpannerConfig(config.withServiceFactory(serviceFactory)); }