@Test public void testTopicValidationSuccess() throws Exception { PubsubIO.readStrings().fromTopic("projects/my-project/topics/abc"); PubsubIO.readStrings().fromTopic("projects/my-project/topics/ABC"); PubsubIO.readStrings().fromTopic("projects/my-project/topics/AbC-DeF"); PubsubIO.readStrings().fromTopic("projects/my-project/topics/AbC-1234"); PubsubIO.readStrings().fromTopic("projects/my-project/topics/AbC-1234-_.~%+-_.~%+-_.~%+-abc"); PubsubIO.readStrings() .fromTopic( new StringBuilder() .append("projects/my-project/topics/A-really-long-one-") .append( "111111111111111111111111111111111111111111111111111111111111111111111111111111111") .append( "111111111111111111111111111111111111111111111111111111111111111111111111111111111") .append( "11111111111111111111111111111111111111111111111111111111111111111111111111") .toString()); }
@Override public PDone expand(PBegin input) { return input .apply("Start signal", Create.of(START_SIGNAL_MESSAGE)) .apply(PubsubIO.writeStrings().to(startTopicPath.getPath())); } }
/** * Runs the pipeline with the supplied options. * * @param options The execution parameters to the pipeline. * @return The result of the pipeline execution. */ public static PipelineResult run(Options options) { // Create the pipeline Pipeline pipeline = Pipeline.create(options); /** * Steps: 1) Read PubSubMessage with attributes from input PubSub subscription. * 2) Apply any filters if an attribute=value pair is provided. * 3) Write each PubSubMessage to output PubSub topic. */ pipeline .apply( "Read PubSub Events", PubsubIO.readMessagesWithAttributes().fromSubscription(options.getInputSubscription())) .apply("Filter Events If Enabled", ParDo.of(new ExtractAndFilterEventsFn())) .apply("Write PubSub Events", PubsubIO.writeMessages().to(options.getOutputTopic())); // Execute the pipeline and return the result. return pipeline.run(); } }
@Test public void testPubsubIOGetName() { assertEquals( "PubsubIO.Read", PubsubIO.readStrings().fromTopic("projects/myproject/topics/mytopic").getName()); assertEquals( "PubsubIO.Write", PubsubIO.writeStrings().to("projects/myproject/topics/mytopic").getName()); }
private PubsubIO.Read<PubsubMessage> readMessagesWithAttributes() { PubsubIO.Read<PubsubMessage> read = PubsubIO.readMessagesWithAttributes().fromTopic(getTopic()); return (getTimestampAttribute() == null) ? read : read.withTimestampAttribute(getTimestampAttribute()); }
private PubsubIO.Write<PubsubMessage> writeMessagesToDlq() { PubsubIO.Write<PubsubMessage> write = PubsubIO.writeMessages().to(getDeadLetterQueue()); return (getTimestampAttribute() == null) ? write : write.withTimestampAttribute(getTimestampAttribute()); }
/** * Returns A {@link PTransform} that continuously reads binary encoded protobuf messages of the * given type from a Google Cloud Pub/Sub stream. */ public static <T extends Message> Read<T> readProtos(Class<T> messageClass) { // TODO: Stop using ProtoCoder and instead parse the payload directly. // We should not be relying on the fact that ProtoCoder's wire format is identical to // the protobuf wire format, as the wire format is not part of a coder's API. ProtoCoder<T> coder = ProtoCoder.of(messageClass); return PubsubIO.<T>read().withCoderAndParseFn(coder, new ParsePayloadUsingCoder<>(coder)); }
} else {// normal PubsubIO.Read<PubsubMessage> pubsubRead = PubsubIO.readMessages().fromSubscription(String.format("projects/%s/subscriptions/%s", datastore.projectName.getValue(), dataset.subscription.getValue())); if (properties.idLabel.getValue() != null && !"".equals(properties.idLabel.getValue())) {
.apply( "Read PubSub Events", PubsubIO.readMessagesWithAttributes().fromTopic(options.getInputTopic())) .apply("Map to Archive", ParDo.of(new PubsubMessageToArchiveDoFn())) .apply(
@Override public PDone expand(PCollection<IndexedRecord> in) { PubSubDatasetProperties dataset = properties.getDatasetProperties(); PubSubDatastoreProperties datastore = dataset.getDatastoreProperties(); prepareTopicSubscription(properties); PubsubIO.Write<PubsubMessage> pubsubWrite = PubsubIO.writeMessages().to( String.format("projects/%s/topics/%s", datastore.projectName.getValue(), dataset.topic.getValue())); if (properties.idLabel.getValue() != null && !"".equals(properties.idLabel.getValue())) { pubsubWrite.withIdAttribute(properties.idLabel.getValue()); } if (properties.timestampLabel.getValue() != null && !"".equals(properties.timestampLabel.getValue())) { pubsubWrite.withTimestampAttribute(properties.timestampLabel.getValue()); } switch (dataset.valueFormat.getValue()) { case CSV: { return in.apply(MapElements.via(new FormatCsv(dataset.fieldDelimiter.getValue()))).apply(pubsubWrite); } case AVRO: { return in.apply(MapElements.via(new FormatAvro())).apply(pubsubWrite); } default: throw new RuntimeException("To be implemented: " + dataset.valueFormat.getValue()); } }
/** * Returns A {@link PTransform} that continuously reads UTF-8 encoded strings from a Google Cloud * Pub/Sub stream. */ public static Read<String> readStrings() { return PubsubIO.<String>read() .withCoderAndParseFn(StringUtf8Coder.of(), new ParsePayloadAsUtf8()); }
@Test public void testTopicValidationTooLong() throws Exception { thrown.expect(IllegalArgumentException.class); PubsubIO.readStrings() .fromTopic( new StringBuilder() .append("projects/my-project/topics/A-really-long-one-") .append( "111111111111111111111111111111111111111111111111111111111111111111111111111111111") .append( "111111111111111111111111111111111111111111111111111111111111111111111111111111111") .append( "1111111111111111111111111111111111111111111111111111111111111111111111111111") .toString()); }
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); if (options.getInput() == null) { options.setInput(Options.GDELT_EVENTS_URL + options.getDate() + ".export.CSV.zip"); } LOG.info(options.toString()); Pipeline pipeline = Pipeline.create(options); pipeline .apply("ReadFromGDELTFile", TextIO.read().from(options.getInput())) .apply("WriteToPubsub", PubsubIO.writeStrings() .to(options.getTopic())); pipeline.run(); } }
PubsubIO.readMessagesWithAttributes().fromTopic(options.getInputTopic()))
/** * Returns A {@link PTransform} that continuously reads binary encoded Avro messages of the given * type from a Google Cloud Pub/Sub stream. */ public static <T> Read<T> readAvros(Class<T> clazz) { // TODO: Stop using AvroCoder and instead parse the payload directly. // We should not be relying on the fact that AvroCoder's wire format is identical to // the Avro wire format, as the wire format is not part of a coder's API. AvroCoder<T> coder = AvroCoder.of(clazz); return PubsubIO.<T>read().withCoderAndParseFn(coder, new ParsePayloadUsingCoder<>(coder)); }
@Test public void testTopicValidationBadCharacter() throws Exception { thrown.expect(IllegalArgumentException.class); PubsubIO.readStrings().fromTopic("projects/my-project/topics/abc-*-abc"); }
@Test @Category(ValidatesRunner.class) public void testPrimitiveWriteDisplayData() { DisplayDataEvaluator evaluator = DisplayDataEvaluator.create(); PubsubIO.Write<?> write = PubsubIO.writeStrings().to("projects/project/topics/topic"); Set<DisplayData> displayData = evaluator.displayDataForPrimitiveTransforms(write); assertThat( "PubsubIO.Write should include the topic in its primitive display data", displayData, hasItem(hasDisplayItem("topic"))); } }
PubsubIO.readMessagesWithAttributes().fromTopic(dlqTopic.topicPath().getPath()));
streamData = pipeline.apply("ReadPubSub",PubsubIO.readStrings().fromSubscription(String.format("projects/%1$s/subscriptions/%2$s",projectId,pubSubTopicSub))); streamData = pipeline.apply("ReadPubSub",PubsubIO.readStrings().fromTopic(String.format("projects/%1$s/topics/%2$s",projectId,pubSubTopic)));
@Override public POutput expand(PCollection<? extends T> input) { return input // assign a dummy key and global window, // this is needed to accumulate all observed events in the same state cell .apply(Window.into(new GlobalWindows())) .apply(WithKeys.of("dummyKey")) .apply( "checkAllEventsForSuccess", ParDo.of(new StatefulPredicateCheck<>(coder, formatter, successPredicate))) // signal the success/failure to the result topic .apply("publishSuccess", PubsubIO.writeStrings().to(resultTopicPath.getPath())); } }