@VisibleForTesting protected DataflowRunner(DataflowPipelineOptions options) { this.options = options; this.dataflowClient = DataflowClient.create(options); this.translator = DataflowPipelineTranslator.fromOptions(options); this.pcollectionsRequiringIndexedFormat = new HashSet<>(); this.ptransformViewsWithNonDeterministicKeyCoders = new HashSet<>(); }
Job job = dataflowClient.getJob(jobId); State currentState = MonitoringUtil.toState(job.getCurrentState()); if (currentState.isTerminal()) {
@Nullable @VisibleForTesting JobMetrics getJobMetrics(DataflowPipelineJob job) { JobMetrics metrics = null; try { metrics = dataflowClient.getJobMetrics(job.getJobId()); } catch (IOException e) { LOG.warn("Failed to get job metrics: ", e); } return metrics; }
String pageToken = null; while (true) { ListJobMessagesResponse response = dataflowClient.listJobMessages(jobId, pageToken); if (response == null || response.getJobMessages() == null) { return allMessages;
public static DataflowClient create(DataflowPipelineOptions options) { return new DataflowClient(options.getDataflowClient(), options); }
content.setRequestedState("JOB_STATE_CANCELLED"); try { Job job = dataflowClient.updateJob(jobId, content); return MonitoringUtil.toState(job.getCurrentState()); } catch (IOException e) {
/** Finds the id for the running job of the given name. */ private String getJobIdFromName(String jobName) { try { ListJobsResponse listResult; String token = null; do { listResult = dataflowClient.listJobs(token); token = listResult.getNextPageToken(); for (Job job : listResult.getJobs()) { if (job.getName().equals(jobName) && MonitoringUtil.toState(job.getCurrentState()).equals(State.RUNNING)) { return job.getId(); } } } while (token != null); } catch (GoogleJsonResponseException e) { throw new RuntimeException( "Got error while looking up jobs: " + (e.getDetails() != null ? e.getDetails().getMessage() : e), e); } catch (IOException e) { throw new RuntimeException("Got error while looking up jobs: ", e); } throw new IllegalArgumentException("Could not find running job named " + jobName); }
@Test public void testGetJobMessages() throws IOException { DataflowClient dataflowClient = mock(DataflowClient.class); ListJobMessagesResponse firstResponse = new ListJobMessagesResponse(); firstResponse.setJobMessages(new ArrayList<>()); for (long i = 0; i < 100; ++i) { JobMessage message = new JobMessage(); message.setId("message_" + i); message.setTime(TimeUtil.toCloudTime(new Instant(i))); firstResponse.getJobMessages().add(message); } String pageToken = "page_token"; firstResponse.setNextPageToken(pageToken); ListJobMessagesResponse secondResponse = new ListJobMessagesResponse(); secondResponse.setJobMessages(new ArrayList<>()); for (long i = 100; i < 150; ++i) { JobMessage message = new JobMessage(); message.setId("message_" + i); message.setTime(TimeUtil.toCloudTime(new Instant(i))); secondResponse.getJobMessages().add(message); } when(dataflowClient.listJobMessages(JOB_ID, null)).thenReturn(firstResponse); when(dataflowClient.listJobMessages(JOB_ID, pageToken)).thenReturn(secondResponse); MonitoringUtil util = new MonitoringUtil(dataflowClient); List<JobMessage> messages = util.getJobMessages(JOB_ID, -1); assertEquals(150, messages.size()); }
private JobMetrics getJobMetrics() throws IOException { if (cachedMetricResults != null) { // Metric results have been cached after the job ran. return cachedMetricResults; } JobMetrics result = dataflowClient.getJobMetrics(dataflowPipelineJob.jobId); if (dataflowPipelineJob.getState().isTerminal()) { // Add current query result to the cache. cachedMetricResults = result; } return result; }
@Test public void testCancelUnterminatedJobThatSucceeds() throws IOException { Dataflow.Projects.Locations.Jobs.Update update = mock(Dataflow.Projects.Locations.Jobs.Update.class); when(mockJobs.update(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID), any(Job.class))) .thenReturn(update); when(update.execute()).thenReturn(new Job().setCurrentState("JOB_STATE_CANCELLED")); DataflowPipelineJob job = new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, null); assertEquals(State.CANCELLED, job.cancel()); Job content = new Job(); content.setProjectId(PROJECT_ID); content.setId(JOB_ID); content.setRequestedState("JOB_STATE_CANCELLED"); verify(mockJobs).update(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID), eq(content)); verifyNoMoreInteractions(mockJobs); }
/** Tests that a {@link DataflowPipelineJob} does not duplicate messages. */ @Test public void testWaitUntilFinishNoRepeatedLogs() throws Exception { DataflowPipelineJob job = new DataflowPipelineJob(mockDataflowClient, JOB_ID, options, null); Sleeper sleeper = new ZeroSleeper(); NanoClock nanoClock = mock(NanoClock.class); Instant separatingTimestamp = new Instant(42L); JobMessage theMessage = infoMessage(separatingTimestamp, "nothing"); MonitoringUtil mockMonitor = mock(MonitoringUtil.class); when(mockMonitor.getJobMessages(anyString(), anyLong())) .thenReturn(ImmutableList.of(theMessage)); // The Job just always reports "running" across all calls Job fakeJob = new Job(); fakeJob.setCurrentState("JOB_STATE_RUNNING"); when(mockDataflowClient.getJob(anyString())).thenReturn(fakeJob); // After waitUntilFinish the DataflowPipelineJob should record the latest message timestamp when(nanoClock.nanoTime()).thenReturn(0L).thenReturn(2000000000L); job.waitUntilFinish(Duration.standardSeconds(1), mockHandler, sleeper, nanoClock, mockMonitor); verify(mockHandler).process(ImmutableList.of(theMessage)); // Second waitUntilFinish should request jobs with `separatingTimestamp` so the monitor // will only return new messages when(nanoClock.nanoTime()).thenReturn(3000000000L).thenReturn(6000000000L); job.waitUntilFinish(Duration.standardSeconds(1), mockHandler, sleeper, nanoClock, mockMonitor); verify(mockMonitor).getJobMessages(anyString(), eq(separatingTimestamp.getMillis())); }
@Test public void testCachingMetricUpdates() throws IOException { Job modelJob = new Job(); modelJob.setCurrentState(State.RUNNING.toString()); DataflowPipelineJob job = mock(DataflowPipelineJob.class); DataflowPipelineOptions options = mock(DataflowPipelineOptions.class); when(options.isStreaming()).thenReturn(false); when(job.getDataflowOptions()).thenReturn(options); when(job.getState()).thenReturn(State.DONE); job.jobId = JOB_ID; JobMetrics jobMetrics = new JobMetrics(); jobMetrics.setMetrics(ImmutableList.of()); DataflowClient dataflowClient = mock(DataflowClient.class); when(dataflowClient.getJobMetrics(JOB_ID)).thenReturn(jobMetrics); DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient); verify(dataflowClient, times(0)).getJobMetrics(JOB_ID); dataflowMetrics.queryMetrics(null); verify(dataflowClient, times(1)).getJobMetrics(JOB_ID); dataflowMetrics.queryMetrics(null); verify(dataflowClient, times(1)).getJobMetrics(JOB_ID); }
public State mockWaitToFinishInState(State state) throws Exception { Dataflow.Projects.Locations.Jobs.Get statusRequest = mock(Dataflow.Projects.Locations.Jobs.Get.class); Job statusResponse = new Job(); statusResponse.setCurrentState("JOB_STATE_" + state.name()); if (state == State.UPDATED) { statusResponse.setReplacedByJobId(REPLACEMENT_JOB_ID); } when(mockJobs.get(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID))).thenReturn(statusRequest); when(statusRequest.execute()).thenReturn(statusResponse); DataflowPipelineJob job = new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, ImmutableMap.of()); return job.waitUntilFinish(Duration.standardMinutes(1), null, fastClock, fastClock); }
@Test public void testGetJobMetricsThatFailsForException() throws Exception { DataflowPipelineJob job = spy(new DataflowPipelineJob(mockClient, "test-job", options, null)); Pipeline p = TestPipeline.create(options); p.apply(Create.of(1, 2, 3)); when(mockClient.getJobMetrics(anyString())).thenThrow(new IOException()); TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient); assertNull(runner.getJobMetrics(job)); }
/** * Test that {@link DataflowPipelineJob#cancel} doesn't throw if the Dataflow service returns * non-terminal state even though the cancel API call failed, which can happen in practice. * * <p>TODO: delete this code if the API calls become consistent. */ @Test public void testCancelTerminatedJobWithStaleState() throws IOException { Dataflow.Projects.Locations.Jobs.Get statusRequest = mock(Dataflow.Projects.Locations.Jobs.Get.class); Job statusResponse = new Job(); statusResponse.setCurrentState("JOB_STATE_RUNNING"); when(mockJobs.get(PROJECT_ID, REGION_ID, JOB_ID)).thenReturn(statusRequest); when(statusRequest.execute()).thenReturn(statusResponse); Dataflow.Projects.Locations.Jobs.Update update = mock(Dataflow.Projects.Locations.Jobs.Update.class); when(mockJobs.update(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID), any(Job.class))) .thenReturn(update); when(update.execute()).thenThrow(new IOException("Job has terminated in state SUCCESS")); DataflowPipelineJob job = new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, null); State returned = job.cancel(); assertThat(returned, equalTo(State.RUNNING)); expectedLogs.verifyWarn("Cancel failed because job is already terminated."); }
@Test public void testRunStreamingJobNotUsingPAssertThatSucceeds() throws Exception { options.setStreaming(true); Pipeline p = TestPipeline.create(options); p.apply(Create.of(1, 2, 3)); DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class); when(mockJob.getState()).thenReturn(State.DONE); when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class))) .thenReturn(State.DONE); when(mockJob.getProjectId()).thenReturn("test-project"); when(mockJob.getJobId()).thenReturn("test-job"); DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class); when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob); when(mockClient.getJobMetrics(anyString())) .thenReturn(generateMockStreamingMetricResponse(ImmutableMap.of())); TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient); runner.run(p, mockRunner); }
@Test public void testWaitToFinishTimeFail() throws Exception { Dataflow.Projects.Locations.Jobs.Get statusRequest = mock(Dataflow.Projects.Locations.Jobs.Get.class); when(mockJobs.get(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID))).thenReturn(statusRequest); when(statusRequest.execute()).thenThrow(IOException.class); DataflowPipelineJob job = new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, ImmutableMap.of()); long startTime = fastClock.nanoTime(); State state = job.waitUntilFinish(Duration.millis(4), null, fastClock, fastClock); assertEquals(null, state); long timeDiff = TimeUnit.NANOSECONDS.toMillis(fastClock.nanoTime() - startTime); // Should only have slept for the 4 ms allowed. assertEquals(4L, timeDiff); }
@Test public void testEmptyMetricUpdates() throws IOException { Job modelJob = new Job(); modelJob.setCurrentState(State.RUNNING.toString()); DataflowPipelineJob job = mock(DataflowPipelineJob.class); DataflowPipelineOptions options = mock(DataflowPipelineOptions.class); when(options.isStreaming()).thenReturn(false); when(job.getDataflowOptions()).thenReturn(options); when(job.getState()).thenReturn(State.RUNNING); job.jobId = JOB_ID; JobMetrics jobMetrics = new JobMetrics(); jobMetrics.setMetrics(null /* this is how the APIs represent empty metrics */); DataflowClient dataflowClient = mock(DataflowClient.class); when(dataflowClient.getJobMetrics(JOB_ID)).thenReturn(jobMetrics); DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient); MetricQueryResults result = dataflowMetrics.queryMetrics(null); assertThat(ImmutableList.copyOf(result.getCounters()), is(empty())); assertThat(ImmutableList.copyOf(result.getDistributions()), is(empty())); }
@Test public void testCancelTerminatedJob() throws IOException { Dataflow.Projects.Locations.Jobs.Get statusRequest = mock(Dataflow.Projects.Locations.Jobs.Get.class); Job statusResponse = new Job(); statusResponse.setCurrentState("JOB_STATE_FAILED"); when(mockJobs.get(PROJECT_ID, REGION_ID, JOB_ID)).thenReturn(statusRequest); when(statusRequest.execute()).thenReturn(statusResponse); Dataflow.Projects.Locations.Jobs.Update update = mock(Dataflow.Projects.Locations.Jobs.Update.class); when(mockJobs.update(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID), any(Job.class))) .thenReturn(update); when(update.execute()).thenThrow(new IOException()); DataflowPipelineJob job = new DataflowPipelineJob(DataflowClient.create(options), JOB_ID, options, null); assertEquals(State.FAILED, job.cancel()); Job content = new Job(); content.setProjectId(PROJECT_ID); content.setId(JOB_ID); content.setRequestedState("JOB_STATE_CANCELLED"); verify(mockJobs).update(eq(PROJECT_ID), eq(REGION_ID), eq(JOB_ID), eq(content)); verify(mockJobs).get(PROJECT_ID, REGION_ID, JOB_ID); verifyNoMoreInteractions(mockJobs); }