@Test public void testGcsUploadBufferSizeIsUnsetForBatchWhenDefault() throws IOException { DataflowPipelineOptions batchOptions = buildPipelineOptions(); batchOptions.setRunner(DataflowRunner.class); Pipeline.create(batchOptions); assertNull(batchOptions.getGcsUploadBufferSizeBytes()); }
@Test public void testGcsUploadBufferSizeIsSetForStreamingWhenDefault() throws IOException { DataflowPipelineOptions streamingOptions = buildPipelineOptions(); streamingOptions.setStreaming(true); streamingOptions.setRunner(DataflowRunner.class); Pipeline p = Pipeline.create(streamingOptions); // Instantiation of a runner prior to run() currently has a side effect of mutating the options. // This could be tested by DataflowRunner.fromOptions(streamingOptions) but would not ensure // that the pipeline itself had the expected options set. p.run(); assertEquals( DataflowRunner.GCS_UPLOAD_BUFFER_SIZE_BYTES_DEFAULT, streamingOptions.getGcsUploadBufferSizeBytes().intValue()); }
private Pipeline createTestBatchRunner() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setProject("someproject"); options.setGcpTempLocation("gs://staging"); options.setPathValidatorClass(NoopPathValidator.class); options.setDataflowClient(dataflow); return Pipeline.create(options); }
@Test public void testNoStagingLocationAndNoTempLocationFails() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setProject("foo-project"); thrown.expect(IllegalArgumentException.class); thrown.expectMessage( "DataflowRunner requires gcpTempLocation, " + "but failed to retrieve a value from PipelineOption"); DataflowRunner.fromOptions(options); }
/** * Create a test pipeline that uses the {@link DataflowRunner} so that {@link GroupByKey} is not * expanded. This is used for verifying that even without expansion the proper errors show up. */ private Pipeline createTestServiceRunner() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setProject("someproject"); options.setGcpTempLocation("gs://staging"); options.setPathValidatorClass(NoopPathValidator.class); options.setDataflowClient(dataflow); return Pipeline.create(options); }
private Pipeline createTestStreamingRunner() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setStreaming(true); options.setProject("someproject"); options.setGcpTempLocation("gs://staging"); options.setPathValidatorClass(NoopPathValidator.class); options.setDataflowClient(dataflow); return Pipeline.create(options); }
@Test public void testNoProjectFails() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); // Explicitly set to null to prevent the default instance factory from reading credentials // from a user's environment, causing this test to fail. options.setProject(null); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Project id"); thrown.expectMessage("when running a Dataflow in the cloud"); DataflowRunner.fromOptions(options); }
@Test public void testGcpTempAndNoTempLocationSucceeds() throws Exception { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setGcpCredential(new TestCredential()); options.setProject("foo-project"); options.setGcpTempLocation(VALID_TEMP_BUCKET); options.setGcsUtil(mockGcsUtil); DataflowRunner.fromOptions(options); }
@Test public void testTempLocationAndNoGcpTempLocationSucceeds() throws Exception { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setGcpCredential(new TestCredential()); options.setProject("foo-project"); options.setTempLocation(VALID_TEMP_BUCKET); options.setGcsUtil(mockGcsUtil); DataflowRunner.fromOptions(options); }
@Test public void testProjectId() throws IOException { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setProject("foo-12345"); options.setGcpTempLocation(VALID_TEMP_BUCKET); options.setGcsUtil(mockGcsUtil); options.setGcpCredential(new TestCredential()); DataflowRunner.fromOptions(options); }
@Test public void testProjectPrefix() throws IOException { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setProject("google.com:some-project-12345"); options.setGcpTempLocation(VALID_TEMP_BUCKET); options.setGcsUtil(mockGcsUtil); options.setGcpCredential(new TestCredential()); DataflowRunner.fromOptions(options); }
@Test public void testProjectDescription() throws IOException { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setProject("some project"); options.setGcpTempLocation(VALID_TEMP_BUCKET); options.setGcsUtil(mockGcsUtil); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Project ID"); thrown.expectMessage("project description"); DataflowRunner.fromOptions(options); }
@Test public void testSingleOutputOverrideNonCrashing() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); options.setRunner(DataflowRunner.class); Pipeline pipeline = Pipeline.create(options); DummyStatefulDoFn fn = new DummyStatefulDoFn(); pipeline.apply(Create.of(KV.of(1, 2))).apply(ParDo.of(fn)); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn)); }
@Test public void testFnApiSingleOutputOverrideNonCrashing() throws Exception { DataflowPipelineOptions options = buildPipelineOptions("--experiments=beam_fn_api"); options.setRunner(DataflowRunner.class); Pipeline pipeline = Pipeline.create(options); DummyStatefulDoFn fn = new DummyStatefulDoFn(); pipeline.apply(Create.of(KV.of(1, 2))).apply(ParDo.of(fn)); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn)); }
@Test public void testProjectNumber() throws IOException { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setRunner(DataflowRunner.class); options.setProject("12345"); options.setGcpTempLocation(VALID_TEMP_BUCKET); options.setGcsUtil(mockGcsUtil); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Project ID"); thrown.expectMessage("project number"); DataflowRunner.fromOptions(options); }
private Pipeline buildPipeline(DataflowPipelineOptions options) { options.setRunner(DataflowRunner.class); Pipeline p = Pipeline.create(options); p.apply("ReadMyFile", TextIO.read().from("gs://bucket/object")) .apply("WriteMyFile", TextIO.write().to("gs://bucket/object")); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(p); return p; }
private Pipeline buildDataflowPipeline(DataflowPipelineOptions options) { options.setStableUniqueNames(CheckEnabled.ERROR); options.setRunner(DataflowRunner.class); Pipeline p = Pipeline.create(options); p.apply("ReadMyFile", TextIO.read().from("gs://bucket/object")) .apply("WriteMyFile", TextIO.write().to("gs://bucket/object")); // Enable the FileSystems API to know about gs:// URIs in this test. FileSystems.setDefaultPipelineOptions(options); return p; }
@Test public void testGcsStagingLocationInitialization() throws Exception { // Set temp location (required), and check that staging location is set. DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setTempLocation(VALID_TEMP_BUCKET); options.setProject(PROJECT_ID); options.setGcpCredential(new TestCredential()); options.setGcsUtil(mockGcsUtil); options.setRunner(DataflowRunner.class); DataflowRunner.fromOptions(options); assertNotNull(options.getStagingLocation()); }
@Test public void testToString() { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); options.setJobName("TestJobName"); options.setProject("test-project"); options.setTempLocation("gs://test/temp/location"); options.setGcpCredential(new TestCredential()); options.setPathValidatorClass(NoopPathValidator.class); options.setRunner(DataflowRunner.class); assertEquals("DataflowRunner#testjobname", DataflowRunner.fromOptions(options).toString()); }
@Test public void testInvalidNumberOfWorkerHarnessThreads() throws IOException { DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class); FileSystems.setDefaultPipelineOptions(options); options.setRunner(DataflowRunner.class); options.setProject("foo-12345"); options.setGcpTempLocation(VALID_TEMP_BUCKET); options.setGcsUtil(mockGcsUtil); options.as(DataflowPipelineDebugOptions.class).setNumberOfWorkerHarnessThreads(-1); thrown.expect(IllegalArgumentException.class); thrown.expectMessage("Number of worker harness threads"); thrown.expectMessage("Please make sure the value is non-negative."); DataflowRunner.fromOptions(options); }