options.setAutoscalingAlgorithm(AutoscalingAlgorithmType.THROUGHPUT_BASED); options.setZone(zone); options.setStreaming(isStreaming); options.setJobName(pipelineName); Pipeline pipeline = Pipeline.create(options);
@Test public void testWorkerHarnessContainerImage() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); // default image set options.setWorkerHarnessContainerImage("some-container"); assertThat(getContainerImageForJob(options), equalTo("some-container")); // batch, legacy options.setWorkerHarnessContainerImage("gcr.io/IMAGE/foo"); options.setExperiments(null); options.setStreaming(false); assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java-batch/foo")); // streaming, legacy options.setStreaming(true); assertThat(getContainerImageForJob(options), equalTo("gcr.io/beam-java-streaming/foo")); // streaming, fnapi options.setExperiments(ImmutableList.of("experiment1", "beam_fn_api")); assertThat(getContainerImageForJob(options), equalTo("gcr.io/java/foo")); }
@Test public void testGcsUploadBufferSizeIsSetForStreamingWhenDefault() throws IOException { DataflowPipelineOptions streamingOptions = buildPipelineOptions(); streamingOptions.setStreaming(true); streamingOptions.setRunner(DataflowRunner.class); Pipeline p = Pipeline.create(streamingOptions); // Instantiation of a runner prior to run() currently has a side effect of mutating the options. // This could be tested by DataflowRunner.fromOptions(streamingOptions) but would not ensure // that the pipeline itself had the expected options set. p.run(); assertEquals( DataflowRunner.GCS_UPLOAD_BUFFER_SIZE_BYTES_DEFAULT, streamingOptions.getGcsUploadBufferSizeBytes().intValue()); }
logWarningIfPCollectionViewHasNonDeterministicKeyCoder(pipeline); if (containsUnboundedPCollection(pipeline)) { options.setStreaming(true);
private Pipeline createTestStreamingRunner() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setStreaming(true); options.setProject("someproject"); options.setGcpTempLocation("gs://staging"); options.setPathValidatorClass(NoopPathValidator.class); options.setDataflowClient(dataflow); return Pipeline.create(options); }
@Test public void testGcsUploadBufferSizeUnchangedWhenNotDefault() throws IOException { int gcsUploadBufferSizeBytes = 12345678; DataflowPipelineOptions batchOptions = buildPipelineOptions(); batchOptions.setGcsUploadBufferSizeBytes(gcsUploadBufferSizeBytes); batchOptions.setRunner(DataflowRunner.class); Pipeline.create(batchOptions); assertEquals(gcsUploadBufferSizeBytes, batchOptions.getGcsUploadBufferSizeBytes().intValue()); DataflowPipelineOptions streamingOptions = buildPipelineOptions(); streamingOptions.setStreaming(true); streamingOptions.setGcsUploadBufferSizeBytes(gcsUploadBufferSizeBytes); streamingOptions.setRunner(DataflowRunner.class); Pipeline.create(streamingOptions); assertEquals( gcsUploadBufferSizeBytes, streamingOptions.getGcsUploadBufferSizeBytes().intValue()); }
private static void injectMessages(BigtablePubsubOptions options) { String inputFile = options.getInputFile(); String topic = options.getPubsubTopic(); DataflowPipelineOptions copiedOptions = options.as(DataflowPipelineOptions.class); copiedOptions.setStreaming(false); copiedOptions.setNumWorkers(INJECTORNUMWORKERS); copiedOptions.setJobName(copiedOptions.getJobName() + "-injector"); Pipeline injectorPipeline = Pipeline.create(copiedOptions); injectorPipeline.apply(TextIO.read().from(inputFile)) .apply(ParDo.of(new FilterEmptyStringsFn())) .apply(PubsubIO.writeStrings().to(topic)); injectorPipeline.run().waitUntilFinish(); }
/** * Test that in translation the name for a collection (in this case just a Create output) is * overridden to be what the Dataflow service expects. */ @Test public void testNamesOverridden() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); DataflowRunner runner = DataflowRunner.fromOptions(options); options.setStreaming(false); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); pipeline.apply("Jazzy", Create.of(3)).setName("foobizzle"); runner.replaceTransforms(pipeline); Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob(); // The Create step Step step = job.getSteps().get(0); // This is the name that is "set by the user" that the Dataflow translator must override String userSpecifiedName = getString( Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null).get(0), PropertyNames.USER_NAME); // This is the calculated name that must actually be used String calculatedName = getString(step.getProperties(), PropertyNames.USER_NAME) + ".out0"; assertThat(userSpecifiedName, equalTo(calculatedName)); }
DataflowPipelineOptions options = buildPipelineOptions(); DataflowRunner runner = DataflowRunner.fromOptions(options); options.setStreaming(false); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
DataflowPipelineOptions options = buildPipelineOptions(); DataflowRunner runner = DataflowRunner.fromOptions(options); options.setStreaming(false); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
DataflowPipelineOptions options = buildPipelineOptions(); DataflowRunner runner = DataflowRunner.fromOptions(options); options.setStreaming(true); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
DataflowPipelineOptions options = buildPipelineOptions(); DataflowRunner runner = DataflowRunner.fromOptions(options); options.setStreaming(true); options.setExperiments(Arrays.asList("beam_fn_api")); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
options.as(DataflowPipelineOptions.class).setStreaming(true); Pipeline p = Pipeline.create(options);