new PubsubUnboundedSink( FACTORY, NestedValueProvider.of(getTopicProvider(), new TopicPathTranslator()), getTimestampAttribute(), getIdAttribute(),
/** Returns the property name associated with this provider. */ public String propertyName() { if (value instanceof RuntimeValueProvider) { return ((RuntimeValueProvider) value).propertyName(); } else if (value instanceof NestedValueProvider) { return ((NestedValueProvider) value).propertyName(); } else { throw new RuntimeException( "Only a RuntimeValueProvider or a NestedValueProvider can supply" + " a property name."); } }
@Override public String toString() { if (isAccessible()) { return String.valueOf(get()); } return MoreObjects.toStringHelper(this) .add("value", value) .add("translator", translator.getClass().getSimpleName()) .toString(); } }
ValueProvider.NestedValueProvider.of( outputDir, (SerializableFunction<String, ResourceId>) s -> FileSystems.matchNewResource(s, true));
BigQueryIO.writeTableRows() .withSchema( NestedValueProvider.of( options.getJSONPath(), new SerializableFunction<String, TableSchema>() {
stepContext.addInput( PropertyNames.PUBSUB_TOPIC_OVERRIDE, ((NestedValueProvider) overriddenTransform.getTopicProvider()).propertyName()); stepContext.addInput( PropertyNames.PUBSUB_SUBSCRIPTION_OVERRIDE, ((NestedValueProvider) overriddenTransform.getSubscriptionProvider()).propertyName());
options.getOutputShardTemplate(), options.getOutputFilenameSuffix())) .withTempDirectory(NestedValueProvider.of( options.getAvroTempDirectory(), (SerializableFunction<String, ResourceId>) input ->
stepContext.addInput( PropertyNames.PUBSUB_TOPIC_OVERRIDE, ((NestedValueProvider) overriddenTransform.getTopicProvider()).propertyName());
options.getOutputShardTemplate(), options.getOutputFilenameSuffix())) .withTempDirectory(NestedValueProvider.of( options.getOutputDirectory(), (SerializableFunction<String, ResourceId>) input ->
getTopicProvider() == null ? null : NestedValueProvider.of(getTopicProvider(), new TopicPathTranslator()); @Nullable ValueProvider<SubscriptionPath> subscriptionPath = getSubscriptionProvider() == null ? null : NestedValueProvider.of(getSubscriptionProvider(), new SubscriptionPathTranslator()); PubsubUnboundedSource source = new PubsubUnboundedSource(
/** * Returns the table to write, or {@code null} if writing with {@code tableFunction}. * * <p>If the table's project is not specified, use the executing project. */ @Nullable ValueProvider<TableReference> getTableWithDefaultProject(BigQueryOptions bqOptions) { ValueProvider<TableReference> table = getTable(); if (table == null) { return table; } if (!table.isAccessible()) { LOG.info( "Using a dynamic value for table input. This must contain a project" + " in the table reference: {}", table); return table; } if (Strings.isNullOrEmpty(table.get().getProjectId())) { // If user does not specify a project we assume the table to be located in // the default project. TableReference tableRef = table.get(); tableRef.setProjectId(bqOptions.getProject()); return NestedValueProvider.of( StaticValueProvider.of(BigQueryHelpers.toJsonString(tableRef)), new JsonTableRefToTableRef()); } return table; }
/** * Method to read a BigQuery schema file from GCS and return the file contents as a string. * * @param gcsPath Path string for the schema file in GCS. * @return File contents as a string. */ private static ValueProvider<String> getSchemaFromGCS(ValueProvider<String> gcsPath) { return NestedValueProvider.of( gcsPath, new SimpleFunction<String, String>() { @Override public String apply(String input) { ResourceId sourceResourceId = FileSystems.matchNewResource(input, false); String schema; try (ReadableByteChannel rbc = FileSystems.open(sourceResourceId)) { try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { try (WritableByteChannel wbc = Channels.newChannel(baos)) { ByteStreams.copy(rbc, wbc); schema = baos.toString(Charsets.UTF_8.name()); LOG.info("Extracted schema: " + schema); } } } catch (IOException e) { LOG.error("Error extracting schema: " + e.getMessage()); throw new RuntimeException(e); } return schema; } }); }
static Pipeline buildPipeline(ExportOptions opts) { // Use the base target directory to stage bundles ValueProvider<ResourceId> destinationPath = NestedValueProvider .of(opts.getDestinationPath(), new StringToDirResourceId()); // Concat the destination path & prefix for the final path FilePathPrefix filePathPrefix = new FilePathPrefix(destinationPath, opts.getFilenamePrefix()); SequenceFileSink<ImmutableBytesWritable, Result> sink = new SequenceFileSink<>( destinationPath, DefaultFilenamePolicy.fromStandardParameters( filePathPrefix, null, "", false ), ImmutableBytesWritable.class, WritableSerialization.class, Result.class, ResultSerialization.class ); Pipeline pipeline = Pipeline.create(Utils.tweakOptions(opts)); CloudBigtableScanConfiguration config = TemplateUtils.BuildExportConfig(opts); pipeline .apply("Read table", Read.from(CloudBigtableIO.read(config))) .apply("Format results", MapElements.via(new ResultToKV())) .apply("Write", WriteFiles.to(sink)); return pipeline; }
@Test @Category(NeedsRunner.class) public void testCreateOfProvider() throws Exception { PAssert.that( p.apply( "Static", Create.ofProvider(StaticValueProvider.of("foo"), StringUtf8Coder.of()))) .containsInAnyOrder("foo"); PAssert.that( p.apply( "Static nested", Create.ofProvider( NestedValueProvider.of(StaticValueProvider.of("foo"), input -> input + "bar"), StringUtf8Coder.of()))) .containsInAnyOrder("foobar"); PAssert.that( p.apply( "Runtime", Create.ofProvider(p.newProvider("runtimeFoo"), StringUtf8Coder.of()))) .containsInAnyOrder("runtimeFoo"); p.run(); }
/** Builds CloudBigtableTableConfiguration from input runtime parameters for import job. */ public static CloudBigtableTableConfiguration BuildImportConfig(ImportOptions opts) { CloudBigtableTableConfiguration.Builder builder = new CloudBigtableTableConfiguration.Builder() .withProjectId(opts.getBigtableProject()) .withInstanceId(opts.getBigtableInstanceId()) .withTableId(opts.getBigtableTableId()); if (opts.getBigtableAppProfileId() != null) { builder.withAppProfileId(opts.getBigtableAppProfileId()); } ValueProvider enableThrottling = ValueProvider.NestedValueProvider.of( opts.getMutationThrottleLatencyMs(), (Integer throttleMs) -> String.valueOf(throttleMs > 0)); builder.withConfiguration(BigtableOptionsFactory.BIGTABLE_BUFFERED_MUTATOR_ENABLE_THROTTLING, enableThrottling); builder.withConfiguration(BigtableOptionsFactory.BIGTABLE_BUFFERED_MUTATOR_THROTTLING_THRESHOLD_MILLIS, ValueProvider.NestedValueProvider.of(opts.getMutationThrottleLatencyMs(), String::valueOf)); return builder.build(); }
@Override public PCollection<Export> expand(PBegin input) { NestedValueProvider<String, String> manifestFile = NestedValueProvider.of(importDirectory, s -> GcsUtil.joinPath(s, "spanner-export.json")); return input .apply("Read manifest", FileIO.match().filepattern(manifestFile)) .apply( "Resource id", MapElements.into(TypeDescriptor.of(ResourceId.class)) .via((MatchResult.Metadata::resourceId))) .apply( "Read manifest json", MapElements.into(TypeDescriptor.of(Export.class)) .via(ReadExportManifestFile::readManifest)); }
/** * Constructs a WriteOperation using the default strategy for generating a temporary directory * from the base output filename. * * <p>Default is a uniquely named subdirectory of the provided tempDirectory, e.g. if * tempDirectory is /path/to/foo/, the temporary directory will be * /path/to/foo/temp-beam-foo-$date. * * @param sink the FileBasedSink that will be used to configure this write operation. */ public WriteOperation(FileBasedSink<?, DestinationT, OutputT> sink) { this( sink, NestedValueProvider.of(sink.getTempDirectoryProvider(), new TemporaryDirectoryBuilder())); }
@Override public FileBasedSink.FilenamePolicy getFilenamePolicy(final String destination) { final String uniqueId = sideInput(uniqueIdView); return DefaultFilenamePolicy.fromStandardParameters( ValueProvider.NestedValueProvider.of( baseDir, (SerializableFunction<ResourceId, ResourceId>) r -> r.resolve( GcsUtil.joinPath(uniqueId, destination + ".avro"), ResolveOptions.StandardResolveOptions.RESOLVE_FILE)), null, null, false); }
@Test public void testRuntimeOptionsNotCalledInApplyOutput() { p.enableAbandonedNodeEnforcement(false); BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows() .to(p.newProvider("some-table")) .withSchema( ValueProvider.NestedValueProvider.of( p.newProvider("some-schema"), new BigQueryHelpers.JsonSchemaToTableSchema())) .withoutValidation(); p.apply(Create.empty(TableRowJsonCoder.of())).apply(write); // Test that this doesn't throw. DisplayData.from(write); }
@Test @Category(NeedsRunner.class) public void testNewProvider() { ValueProvider<String> foo = pipeline.newProvider("foo"); ValueProvider<String> foobar = ValueProvider.NestedValueProvider.of(foo, input -> input + "bar"); assertFalse(foo.isAccessible()); assertFalse(foobar.isAccessible()); PAssert.that(pipeline.apply("create foo", Create.ofProvider(foo, StringUtf8Coder.of()))) .containsInAnyOrder("foo"); PAssert.that(pipeline.apply("create foobar", Create.ofProvider(foobar, StringUtf8Coder.of()))) .containsInAnyOrder("foobar"); pipeline.run(); } }