streamData = pipeline.apply("ReadPubSub",PubsubIO.readStrings().fromSubscription(String.format("projects/%1$s/subscriptions/%2$s",projectId,pubSubTopicSub))); streamData = pipeline.apply("ReadPubSub",PubsubIO.readStrings().fromTopic(String.format("projects/%1$s/topics/%2$s",projectId,pubSubTopic)));
} else {// normal PubsubIO.Read<PubsubMessage> pubsubRead = PubsubIO.readMessages().fromSubscription(String.format("projects/%s/subscriptions/%s", datastore.projectName.getValue(), dataset.subscription.getValue())); if (properties.idLabel.getValue() != null && !"".equals(properties.idLabel.getValue())) { pubsubRead.withIdAttribute(properties.idLabel.getValue()); pubsubRead.withTimestampAttribute(properties.timestampLabel.getValue());
@Override public PCollection<T> expand(PBegin input) { if (getTopicProvider() == null && getSubscriptionProvider() == null) { throw new IllegalStateException( "Need to set either the topic or the subscription for " + "a PubsubIO.Read transform"); if (getTopicProvider() != null && getSubscriptionProvider() != null) { throw new IllegalStateException( "Can't set both the topic and the subscription for " + "a PubsubIO.Read transform"); getTopicProvider() == null ? null : NestedValueProvider.of(getTopicProvider(), new TopicPathTranslator()); @Nullable ValueProvider<SubscriptionPath> subscriptionPath = getSubscriptionProvider() == null ? null : NestedValueProvider.of(getSubscriptionProvider(), new SubscriptionPathTranslator()); PubsubUnboundedSource source = new PubsubUnboundedSource( getTimestampAttribute(), getIdAttribute(), getNeedsAttributes()); return input.apply(source).apply(MapElements.via(getParseFn())).setCoder(getCoder());
.apply( PubsubIO.readStrings() .withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE) .fromTopic(options.getTopic())) .apply("ParseGameEvent", ParDo.of(new ParseEventFn()));
PubsubIO.readMessagesWithAttributes().fromTopic(options.getInputTopic()))
PubsubIO.readMessagesWithAttributes().fromTopic(dlqTopic.topicPath().getPath()));
.apply(PubsubIO.readStrings().fromTopic(options.getPubsubTopic())) .apply(Window.<String> into(window)) .apply(ParDo.of(new ExtractWordsFn()))
.apply( "Read PubSub Events", PubsubIO.readMessagesWithAttributes().fromTopic(options.getInputTopic())) .apply("Map to Archive", ParDo.of(new PubsubMessageToArchiveDoFn())) .apply(
.apply( PubsubIO.readStrings() .withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE) .fromTopic(options.getTopic())) .apply("ParseGameEvent", ParDo.of(new ParseEventFn()));
.apply("Read PubSub Events", PubsubIO.readStrings().fromTopic(options.getInputTopic())) .apply( options.getWindowDuration() + " Window",
.withTimestampAttribute(GameConstants.TIMESTAMP_ATTRIBUTE) .fromTopic(options.getTopic())) .apply("ParseGameEvent", ParDo.of(new ParseEventFn()))
/** * Runs a pipeline which reads in JSON from Pubsub, feeds the JSON to a Javascript UDF, * and writes the JSON encoded Entities to Datastore. * * @param args arguments to the pipeline */ public static void main(String[] args) { PubsubToDatastoreOptions options = PipelineOptionsFactory.fromArgs(args) .withValidation() .as(PubsubToDatastoreOptions.class); Pipeline pipeline = Pipeline.create(options); pipeline .apply(PubsubIO.readStrings() .fromTopic(options.getPubsubReadTopic())) .apply(TransformTextViaJavascript.newBuilder() .setFileSystemPath(options.getJavascriptTextTransformGcsPath()) .setFunctionName(options.getJavascriptTextTransformFunctionName()) .build()) .apply(WriteJsonEntities.newBuilder() .setProjectId(options.getDatastoreWriteProjectId()) .build()); pipeline.run(); }
@Test public void testReadPublicData() throws Exception { // The pipeline will never terminate on its own pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false); PCollection<String> messages = pipeline.apply( PubsubIO.readStrings() .fromTopic("projects/pubsub-public-data/topics/taxirides-realtime")); messages.apply( "waitForAnyMessage", signal.signalSuccessWhen(messages.getCoder(), anyMessages -> true)); Supplier<Void> start = signal.waitForStart(Duration.standardMinutes(5)); pipeline.apply(signal.signalStart()); PipelineResult job = pipeline.run(); start.get(); signal.waitForSuccess(Duration.standardSeconds(30)); // A runner may not support cancel try { job.cancel(); } catch (UnsupportedOperationException exc) { // noop } } }
/** * Runs the pipeline with the supplied options. * * @param options The execution parameters to the pipeline. * @return The result of the pipeline execution. */ public static PipelineResult run(Options options) { // Create the pipeline Pipeline pipeline = Pipeline.create(options); /** * Steps: 1) Read PubSubMessage with attributes from input PubSub subscription. * 2) Apply any filters if an attribute=value pair is provided. * 3) Write each PubSubMessage to output PubSub topic. */ pipeline .apply( "Read PubSub Events", PubsubIO.readMessagesWithAttributes().fromSubscription(options.getInputSubscription())) .apply("Filter Events If Enabled", ParDo.of(new ExtractAndFilterEventsFn())) .apply("Write PubSub Events", PubsubIO.writeMessages().to(options.getOutputTopic())); // Execute the pipeline and return the result. return pipeline.run(); } }
@Override public PCollection<FeatureRow> expand(PInput input) { checkArgument(importSpec.getType().equals(PUBSUB_FEATURE_SOURCE_TYPE)); PubSubReadOptions options = OptionsParser.parse(importSpec.getOptionsMap(), PubSubReadOptions.class); PubsubIO.Read<FeatureRow> read = readProtos(); if (!Strings.isNullOrEmpty(options.subscription)) { read = read.fromSubscription(options.subscription); } else if (!Strings.isNullOrEmpty(options.topic)) { read = read.fromTopic(options.topic); } PCollection<FeatureRow> featureRow = input.getPipeline().apply(read); if (options.discardUnknownFeatures) { List<String> featureIds = new ArrayList<>(); for(Field field: importSpec.getSchema().getFieldsList()) { String featureId = field.getFeatureId(); if (!Strings.isNullOrEmpty(featureId)) { featureIds.add(featureId); } } return featureRow.apply(ParDo.of(new FilterFeatureRowDoFn(featureIds))); } return featureRow; }
@Test public void testTopicValidationSuccess() throws Exception { PubsubIO.readStrings().fromTopic("projects/my-project/topics/abc"); PubsubIO.readStrings().fromTopic("projects/my-project/topics/ABC"); PubsubIO.readStrings().fromTopic("projects/my-project/topics/AbC-DeF"); PubsubIO.readStrings().fromTopic("projects/my-project/topics/AbC-1234"); PubsubIO.readStrings().fromTopic("projects/my-project/topics/AbC-1234-_.~%+-_.~%+-_.~%+-abc"); PubsubIO.readStrings() .fromTopic( new StringBuilder() .append("projects/my-project/topics/A-really-long-one-") .append( "111111111111111111111111111111111111111111111111111111111111111111111111111111111") .append( "111111111111111111111111111111111111111111111111111111111111111111111111111111111") .append( "11111111111111111111111111111111111111111111111111111111111111111111111111") .toString()); }
@Test @Category({ValidatesRunner.class, UsesUnboundedPCollections.class}) public void testPrimitiveReadDisplayData() { DisplayDataEvaluator evaluator = DisplayDataEvaluator.create(); Set<DisplayData> displayData; PubsubIO.Read<String> baseRead = PubsubIO.readStrings(); // Reading from a subscription. PubsubIO.Read<String> read = baseRead.fromSubscription("projects/project/subscriptions/subscription"); displayData = evaluator.displayDataForPrimitiveSourceTransforms(read); assertThat( "PubsubIO.Read should include the subscription in its primitive display data", displayData, hasItem(hasDisplayItem("subscription"))); // Reading from a topic. read = baseRead.fromTopic("projects/project/topics/topic"); displayData = evaluator.displayDataForPrimitiveSourceTransforms(read); assertThat( "PubsubIO.Read should include the topic in its primitive display data", displayData, hasItem(hasDisplayItem("topic"))); }
@Test public void testTopicValidationTooLong() throws Exception { thrown.expect(IllegalArgumentException.class); PubsubIO.readStrings() .fromTopic( new StringBuilder() .append("projects/my-project/topics/A-really-long-one-") .append( "111111111111111111111111111111111111111111111111111111111111111111111111111111111") .append( "111111111111111111111111111111111111111111111111111111111111111111111111111111111") .append( "1111111111111111111111111111111111111111111111111111111111111111111111111111") .toString()); }
/** * Creates and returns a transform for reading from a Cloud Pub/Sub topic. Mutually exclusive * with {@link #fromSubscription(String)}. * * <p>See {@link PubsubIO.PubsubTopic#fromPath(String)} for more details on the format of the * {@code topic} string. * * <p>The Beam runner will start reading data published on this topic from the time the pipeline * is started. Any data published on the topic before the pipeline is started will not be read * by the runner. */ public Read<T> fromTopic(String topic) { return fromTopic(StaticValueProvider.of(topic)); }
@Test public void testReadTopicDisplayData() { String topic = "projects/project/topics/topic"; PubsubIO.Read<String> read = PubsubIO.readStrings() .fromTopic(StaticValueProvider.of(topic)) .withTimestampAttribute("myTimestamp") .withIdAttribute("myId"); DisplayData displayData = DisplayData.from(read); assertThat(displayData, hasDisplayItem("topic", topic)); assertThat(displayData, hasDisplayItem("timestampAttribute", "myTimestamp")); assertThat(displayData, hasDisplayItem("idAttribute", "myId")); }