private void finalizeDatasetState(JobState.DatasetState datasetState, String datasetUrn) { for (TaskState taskState : datasetState.getTaskStates()) { // Backoff the actual high watermark to the low watermark for each task that has not been committed if (taskState.getWorkingState() != WorkUnitState.WorkingState.COMMITTED) { taskState.backoffActualHighWatermark(); if (this.jobContext.getJobCommitPolicy() == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS) { // Determine the final dataset state based on the task states (post commit) and the job commit policy. // 1. If COMMIT_ON_FULL_SUCCESS is used, the processing of the dataset is considered failed if any // task for the dataset failed to be committed. // 2. Otherwise, the processing of the dataset is considered successful even if some tasks for the // dataset failed to be committed. datasetState.setState(JobState.RunningState.FAILED); Optional<String> taskStateException = taskState.getTaskFailureException(); log.warn("At least one task did not committed successfully. Setting dataset state to FAILED.", taskStateException.isPresent() ? taskStateException.get() : "Exception not set."); } } } datasetState.setId(datasetUrn); }
this.datasetState.setState(JobState.RunningState.COMMITTED); } else { if (this.datasetState.getState() == JobState.RunningState.SUCCESSFUL) { this.datasetState.setState(JobState.RunningState.COMMITTED); if (commitSequenceBuilder.isPresent()) { buildAndExecuteCommitSequence(commitSequenceBuilder.get(), datasetState, datasetUrn); datasetState.setState(JobState.RunningState.COMMITTED); } else if (canPersistStates) { persistDatasetState(datasetUrn, datasetState);
/** * Create a new {@link JobState.DatasetState} based on this {@link JobState} instance. * * @param fullCopy whether to do a full copy of this {@link JobState} instance * @return a new {@link JobState.DatasetState} object */ public DatasetState newDatasetState(boolean fullCopy) { DatasetState datasetState = new DatasetState(this.jobName, this.jobId); datasetState.setStartTime(this.startTime); datasetState.setEndTime(this.endTime); datasetState.setDuration(this.duration); if (fullCopy) { datasetState.setState(this.state); datasetState.setTaskCount(this.taskCount); datasetState.addTaskStates(this.taskStates.values()); datasetState.addSkippedTaskStates(this.skippedTaskStates.values()); } return datasetState; }
this.datasetState.setState(JobState.RunningState.COMMITTED); } else { if (this.datasetState.getState() == JobState.RunningState.SUCCESSFUL) { this.datasetState.setState(JobState.RunningState.COMMITTED); if (commitSequenceBuilder.isPresent()) { buildAndExecuteCommitSequence(commitSequenceBuilder.get(), datasetState, datasetUrn); datasetState.setState(JobState.RunningState.COMMITTED); } else if (canPersistStates) { persistDatasetState(datasetUrn, datasetState);
private void finalizeDatasetState(JobState.DatasetState datasetState, String datasetUrn) { for (TaskState taskState : datasetState.getTaskStates()) { // Backoff the actual high watermark to the low watermark for each task that has not been committed if (taskState.getWorkingState() != WorkUnitState.WorkingState.COMMITTED) { taskState.backoffActualHighWatermark(); if (this.jobContext.getJobCommitPolicy() == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS) { // Determine the final dataset state based on the task states (post commit) and the job commit policy. // 1. If COMMIT_ON_FULL_SUCCESS is used, the processing of the dataset is considered failed if any // task for the dataset failed to be committed. // 2. Otherwise, the processing of the dataset is considered successful even if some tasks for the // dataset failed to be committed. datasetState.setState(JobState.RunningState.FAILED); Optional<String> taskStateException = taskState.getTaskFailureException(); log.warn("At least one task did not committed successfully. Setting dataset state to FAILED.", taskStateException.isPresent() ? taskStateException.get() : "Exception not set."); } } } datasetState.setId(datasetUrn); }
/** * Finalize a given {@link JobState.DatasetState} before committing the dataset. * * This method is thread-safe. */ private void finalizeDatasetStateBeforeCommit(JobState.DatasetState datasetState) { for (TaskState taskState : datasetState.getTaskStates()) { if (taskState.getWorkingState() != WorkUnitState.WorkingState.SUCCESSFUL && this.jobContext.getJobCommitPolicy() == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS) { // The dataset state is set to FAILED if any task failed and COMMIT_ON_FULL_SUCCESS is used datasetState.setState(JobState.RunningState.FAILED); datasetState.incrementJobFailures(); return; } } datasetState.setState(JobState.RunningState.SUCCESSFUL); datasetState.setNoJobFailure(); }
/** * Create a new {@link JobState.DatasetState} based on this {@link JobState} instance. * * @param fullCopy whether to do a full copy of this {@link JobState} instance * @return a new {@link JobState.DatasetState} object */ public DatasetState newDatasetState(boolean fullCopy) { DatasetState datasetState = new DatasetState(this.jobName, this.jobId); datasetState.setStartTime(this.startTime); datasetState.setEndTime(this.endTime); datasetState.setDuration(this.duration); if (fullCopy) { datasetState.setState(this.state); datasetState.setTaskCount(this.taskCount); datasetState.addTaskStates(this.taskStates.values()); datasetState.addSkippedTaskStates(this.skippedTaskStates.values()); } return datasetState; }
datasetState.setState(JobState.RunningState.COMMITTED); datasetState.setId(TEST_DATASET_URN1); datasetState.setStartTime(this.startTime);
datasetState.setState(JobState.RunningState.COMMITTED); datasetState.setId(TEST_DATASET_URN); datasetState.setStartTime(this.startTime);
@Test(dependsOnMethods = "testGetJobState") public void testPersistDatasetState() throws IOException { JobState.DatasetState datasetState = new JobState.DatasetState(TEST_JOB_NAME, TEST_JOB_ID); datasetState.setDatasetUrn(TEST_DATASET_URN); datasetState.setState(JobState.RunningState.COMMITTED); datasetState.setId(TEST_DATASET_URN); datasetState.setStartTime(this.startTime); datasetState.setEndTime(this.startTime + 1000); datasetState.setDuration(1000); for (int i = 0; i < 3; i++) { TaskState taskState = new TaskState(); taskState.setJobId(TEST_JOB_ID); taskState.setTaskId(TEST_TASK_ID_PREFIX + i); taskState.setId(TEST_TASK_ID_PREFIX + i); taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED); datasetState.addTaskState(taskState); } zkDatasetStateStore.persistDatasetState(TEST_DATASET_URN, datasetState); // persist a second dataset state to test that retrieval of multiple dataset states works datasetState.setDatasetUrn(TEST_DATASET_URN2); datasetState.setId(TEST_DATASET_URN2); datasetState.setDuration(2000); zkDatasetStateStore.persistDatasetState(TEST_DATASET_URN2, datasetState); // second job name for testing getting store names in a later test case datasetState.setJobName(TEST_JOB_NAME2); zkDatasetStateStore.persistDatasetState(TEST_DATASET_URN2, datasetState); }
public void persistDatasetState(List<Dataset> datasets, List<LongWatermark> watermarks, String jobName) throws IOException { Preconditions.checkArgument(datasets.size() >= 2); for (int i = 0; i < datasets.size(); i++) { String datasetUrn = datasets.get(i).getUrn(); JobState.DatasetState datasetState = new JobState.DatasetState(jobName, TEST_JOB_ID); datasetState.setDatasetUrn(datasetUrn); datasetState.setState(JobState.RunningState.COMMITTED); datasetState.setId(datasetUrn); datasetState.setStartTime(this.startTime); datasetState.setEndTime(this.startTime + 1000); datasetState.setDuration(1000); TaskState taskState = new TaskState(); taskState.setJobId(TEST_JOB_ID); taskState.setTaskId(TEST_TASK_ID_PREFIX + i); taskState.setId(TEST_TASK_ID_PREFIX + i); taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED); if (i < datasets.size() - 1) { taskState.setActualHighWatermark(watermarks.get(i)); } datasetState.addTaskState(taskState); this.fsDatasetStateStore.persistDatasetState(datasetUrn, datasetState); } }
/** * Finalize a given {@link JobState.DatasetState} before committing the dataset. * * This method is thread-safe. */ private void finalizeDatasetStateBeforeCommit(JobState.DatasetState datasetState) { for (TaskState taskState : datasetState.getTaskStates()) { if (taskState.getWorkingState() != WorkUnitState.WorkingState.SUCCESSFUL && this.jobContext.getJobCommitPolicy() == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS) { // The dataset state is set to FAILED if any task failed and COMMIT_ON_FULL_SUCCESS is used datasetState.setState(JobState.RunningState.FAILED); datasetState.incrementJobFailures(); return; } } datasetState.setState(JobState.RunningState.SUCCESSFUL); datasetState.setNoJobFailure(); }
@Test(dependsOnMethods = "testGetJobState") public void testPersistDatasetState() throws IOException { JobState.DatasetState datasetState = new JobState.DatasetState(TEST_JOB_NAME, TEST_JOB_ID); datasetState.setDatasetUrn(TEST_DATASET_URN); datasetState.setState(JobState.RunningState.COMMITTED); datasetState.setId(TEST_DATASET_URN); datasetState.setStartTime(this.startTime); datasetState.setEndTime(this.startTime + 1000); datasetState.setDuration(1000); for (int i = 0; i < 3; i++) { TaskState taskState = new TaskState(); taskState.setJobId(TEST_JOB_ID); taskState.setTaskId(TEST_TASK_ID_PREFIX + i); taskState.setId(TEST_TASK_ID_PREFIX + i); taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED); datasetState.addTaskState(taskState); } this.fsDatasetStateStore.persistDatasetState(TEST_DATASET_URN, datasetState); }