private ListMultimap<TaskFactoryWrapper, TaskState> groupByTaskFactory(JobState.DatasetState datasetState) { ListMultimap<TaskFactoryWrapper, TaskState> groupsMap = ArrayListMultimap.create(); for (TaskState taskState : datasetState.getTaskStates()) { groupsMap.put(new TaskFactoryWrapper(TaskUtils.getTaskFactory(taskState).orNull()), taskState); } return groupsMap; }
private void maySubmitLineageEvent(JobState.DatasetState datasetState) { Collection<TaskState> allStates = datasetState.getTaskStates(); Collection<TaskState> states = Lists.newArrayList(); // Filter out failed states or states that don't have lineage info for (TaskState state : allStates) { if (state.getWorkingState() == WorkUnitState.WorkingState.COMMITTED && LineageInfo.hasLineageInfo(state)) { states.add(state); } } if (states.size() == 0) { log.info("Will not submit lineage events as no state contains lineage info"); return; } try { if (StringUtils.isEmpty(datasetUrn)) { // This dataset may contain different kinds of LineageEvent for (Map.Entry<String, Collection<TaskState>> entry : aggregateByLineageEvent(states).entrySet()) { submitLineageEvent(entry.getKey(), entry.getValue()); } } else { submitLineageEvent(datasetUrn, states); } } finally { // Purge lineage info from all states for (TaskState taskState : allStates) { LineageInfo.purgeLineageInfo(taskState); } } }
@SuppressWarnings("unchecked") private Optional<CommitSequence.Builder> generateCommitSequenceBuilder(JobState.DatasetState datasetState, Collection<TaskState> taskStates) { try (Closer closer = Closer.create()) { Class<? extends CommitSequencePublisher> dataPublisherClass = (Class<? extends CommitSequencePublisher>) Class .forName(datasetState .getProp(ConfigurationKeys.DATA_PUBLISHER_TYPE, ConfigurationKeys.DEFAULT_DATA_PUBLISHER_TYPE)); CommitSequencePublisher publisher = (CommitSequencePublisher) closer .register(DataPublisher.getInstance(dataPublisherClass, this.jobContext.getJobState())); publisher.publish(taskStates); return publisher.getCommitSequenceBuilder(); } catch (Throwable t) { log.error("Failed to generate commit sequence", t); setTaskFailureException(datasetState.getTaskStates(), t); throw Throwables.propagate(t); } }
private ListMultimap<TaskFactoryWrapper, TaskState> groupByTaskFactory(JobState.DatasetState datasetState) { ListMultimap<TaskFactoryWrapper, TaskState> groupsMap = ArrayListMultimap.create(); for (TaskState taskState : datasetState.getTaskStates()) { groupsMap.put(new TaskFactoryWrapper(TaskUtils.getTaskFactory(taskState).orNull()), taskState); } return groupsMap; }
for (String dataSetUrn : sortedDatasetUrns) { Assert.assertEquals(dataSetUrn, "TestDataset" + index); List<TaskState> taskStates = datasetStatesByUrns.get(dataSetUrn).getTaskStates(); Assert.assertEquals(taskStates.size(), 1); Assert.assertEquals(taskStates.get(0).getTaskId(), "TestTask-" + index);
Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED); Assert.assertEquals(datasetState.getTaskCount(), 1); TaskState taskState = datasetState.getTaskStates().get(0); Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED); Assert.assertEquals(datasetState.getCompletedTasks(), 1); for (TaskState taskState : datasetState.getTaskStates()) { Assert.assertEquals(taskState.getProp(ConfigurationKeys.DATASET_URN_KEY), "Dataset" + i); Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED);
TaskState readTaskState = readState.getTaskStates().get(0); Assert.assertEquals(readTaskState.getProp("key"), "value"); metadata.delete();
public void runTestWithMultipleDatasets(Properties jobProps) throws Exception { String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY); String jobId = JobLauncherUtils.newJobId(jobName).toString(); jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId); jobProps.setProperty(ConfigurationKeys.SOURCE_CLASS_KEY, MultiDatasetTestSource.class.getName()); Closer closer = Closer.create(); try { JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps)); jobLauncher.launchJob(null); } finally { closer.close(); } for (int i = 0; i < 4; i++) { List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, "Dataset" + i + "-current.jst"); DatasetState datasetState = datasetStateList.get(0); Assert.assertEquals(datasetState.getDatasetUrn(), "Dataset" + i); Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED); Assert.assertEquals(datasetState.getCompletedTasks(), 1); Assert.assertEquals(datasetState.getJobFailures(), 0); for (TaskState taskState : datasetState.getTaskStates()) { Assert.assertEquals(taskState.getProp(ConfigurationKeys.DATASET_URN_KEY), "Dataset" + i); Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED); Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN), TestExtractor.TOTAL_RECORDS); } } }
public void runTestWithFork(Properties jobProps) throws Exception { String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY); String jobId = JobLauncherUtils.newJobId(jobName).toString(); jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId); try (JobLauncher jobLauncher = JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps)) { jobLauncher.launchJob(null); } List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, sanitizeJobNameForDatasetStore(jobId) + ".jst"); DatasetState datasetState = datasetStateList.get(0); Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED); Assert.assertEquals(datasetState.getCompletedTasks(), 4); Assert.assertEquals(datasetState.getJobFailures(), 0); FileSystem lfs = FileSystem.getLocal(new Configuration()); for (TaskState taskState : datasetState.getTaskStates()) { Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED); Path path = new Path(this.launcherProps.getProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR), new Path(taskState.getExtract().getOutputFilePath(), "fork_0")); Assert.assertTrue(lfs.exists(path)); Assert.assertEquals(lfs.listStatus(path).length, 2); Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN + ".0"), TestExtractor.TOTAL_RECORDS); path = new Path(this.launcherProps.getProperty(ConfigurationKeys.DATA_PUBLISHER_FINAL_DIR), new Path(taskState.getExtract().getOutputFilePath(), "fork_1")); Assert.assertTrue(lfs.exists(path)); Assert.assertEquals(lfs.listStatus(path).length, 2); Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN + ".1"), TestExtractor.TOTAL_RECORDS); } }
public void runTest(Properties jobProps) throws Exception { String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY); String jobId = JobLauncherUtils.newJobId(jobName); jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId); JobContext jobContext = null; Closer closer = Closer.create(); try { JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps)); jobLauncher.launchJob(null); jobContext = ((AbstractJobLauncher) jobLauncher).getJobContext(); } finally { closer.close(); } Assert.assertTrue(jobContext.getJobMetricsOptional().isPresent()); String jobMetricContextTags = jobContext.getJobMetricsOptional().get().getMetricContext().getTags().toString(); Assert.assertTrue(jobMetricContextTags.contains(ClusterNameTags.CLUSTER_IDENTIFIER_TAG_NAME), ClusterNameTags.CLUSTER_IDENTIFIER_TAG_NAME + " tag missing in job metric context tags."); List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, sanitizeJobNameForDatasetStore(jobId) + ".jst"); DatasetState datasetState = datasetStateList.get(0); Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED); Assert.assertEquals(datasetState.getCompletedTasks(), 4); Assert.assertEquals(datasetState.getJobFailures(), 0); for (TaskState taskState : datasetState.getTaskStates()) { Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED); Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_RECORDS_WRITTEN), TestExtractor.TOTAL_RECORDS); } }
private void testDatasetStates(List<Dataset> datasets, List<LongWatermark> watermarks, String jobName) throws IOException { Preconditions.checkArgument(datasets.size() >= 2); for (int i = 0; i < datasets.size(); i++) { JobState.DatasetState datasetState = this.fsDatasetStateStore.getLatestDatasetState(jobName, datasets.get(i).getUrn()); Assert.assertEquals(datasetState.getDatasetUrn(), datasets.get(i).getUrn()); Assert.assertEquals(datasetState.getJobName(), jobName); Assert.assertEquals(datasetState.getJobId(), TEST_JOB_ID); Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED); Assert.assertEquals(datasetState.getStartTime(), this.startTime); Assert.assertEquals(datasetState.getEndTime(), this.startTime + 1000); Assert.assertEquals(datasetState.getDuration(), 1000); Assert.assertEquals(datasetState.getCompletedTasks(), 1); TaskState taskState = datasetState.getTaskStates().get(0); Assert.assertEquals(taskState.getJobId(), TEST_JOB_ID); Assert.assertEquals(taskState.getTaskId(), TEST_TASK_ID_PREFIX + i); Assert.assertEquals(taskState.getId(), TEST_TASK_ID_PREFIX + i); Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED); if (i < datasets.size() - 1) { Assert.assertEquals(taskState.getActualHighWatermark(LongWatermark.class).getValue(), watermarks.get(i).getValue()); } } }
public void runTestWithPullLimit(Properties jobProps, long limit) throws Exception { String jobName = jobProps.getProperty(ConfigurationKeys.JOB_NAME_KEY); String jobId = JobLauncherUtils.newJobId(jobName).toString(); jobProps.setProperty(ConfigurationKeys.JOB_ID_KEY, jobId); Closer closer = Closer.create(); try { JobLauncher jobLauncher = closer.register(JobLauncherFactory.newJobLauncher(this.launcherProps, jobProps)); jobLauncher.launchJob(null); } finally { closer.close(); } List<JobState.DatasetState> datasetStateList = this.datasetStateStore.getAll(jobName, sanitizeJobNameForDatasetStore(jobId) + ".jst"); DatasetState datasetState = datasetStateList.get(0); Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED); Assert.assertEquals(datasetState.getCompletedTasks(), 4); Assert.assertEquals(datasetState.getJobFailures(), 0); for (TaskState taskState : datasetState.getTaskStates()) { Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED); Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.EXTRACTOR_ROWS_EXTRACTED), limit); Assert.assertEquals(taskState.getPropAsLong(ConfigurationKeys.WRITER_ROWS_WRITTEN), limit); } }
@Test(dependsOnMethods = "testPersistDatasetState") public void testGetDatasetState() throws IOException { JobState.DatasetState datasetState = this.fsDatasetStateStore.getLatestDatasetState(TEST_JOB_NAME, TEST_DATASET_URN); Assert.assertEquals(datasetState.getDatasetUrn(), TEST_DATASET_URN); Assert.assertEquals(datasetState.getJobName(), TEST_JOB_NAME); Assert.assertEquals(datasetState.getJobId(), TEST_JOB_ID); Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED); Assert.assertEquals(datasetState.getStartTime(), this.startTime); Assert.assertEquals(datasetState.getEndTime(), this.startTime + 1000); Assert.assertEquals(datasetState.getDuration(), 1000); Assert.assertEquals(datasetState.getCompletedTasks(), 3); for (int i = 0; i < datasetState.getCompletedTasks(); i++) { TaskState taskState = datasetState.getTaskStates().get(i); Assert.assertEquals(taskState.getJobId(), TEST_JOB_ID); Assert.assertEquals(taskState.getTaskId(), TEST_TASK_ID_PREFIX + i); Assert.assertEquals(taskState.getId(), TEST_TASK_ID_PREFIX + i); Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED); } }
@Test(dependsOnMethods = "testPersistDatasetState") public void testGetDatasetState() throws IOException { JobState.DatasetState datasetState = dbDatasetStateStore.getLatestDatasetState(TEST_JOB_NAME, TEST_DATASET_URN); Assert.assertEquals(datasetState.getDatasetUrn(), TEST_DATASET_URN); Assert.assertEquals(datasetState.getJobName(), TEST_JOB_NAME); Assert.assertEquals(datasetState.getJobId(), TEST_JOB_ID); Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED); Assert.assertEquals(datasetState.getStartTime(), this.startTime); Assert.assertEquals(datasetState.getEndTime(), this.startTime + 1000); Assert.assertEquals(datasetState.getDuration(), 1000); Assert.assertEquals(datasetState.getCompletedTasks(), 3); for (int i = 0; i < datasetState.getCompletedTasks(); i++) { TaskState taskState = datasetState.getTaskStates().get(i); Assert.assertEquals(taskState.getJobId(), TEST_JOB_ID); Assert.assertEquals(taskState.getTaskId(), TEST_TASK_ID_PREFIX + i); Assert.assertEquals(taskState.getId(), TEST_TASK_ID_PREFIX + i); Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED); } }
@Override public List<WorkUnit> getWorkunits(SourceState state) { int numTasks = state.getPropAsInt(NUM_TASKS_KEY); String eventBusId = state.getProp(EVENTBUS_ID_KEY); EventBus eventBus = TestingEventBuses.getEventBus(eventBusId); Map<String, SourceState> previousStates = state.getPreviousDatasetStatesByUrns(); for (Map.Entry<String, SourceState> entry : previousStates.entrySet()) { JobState.DatasetState datasetState = (JobState.DatasetState) entry.getValue(); for (TaskState taskState : datasetState.getTaskStates()) { if (taskState.contains(Task.PERSISTENT_STATE) && eventBus != null) { eventBus.post(new Event(PREVIOUS_STATE_EVENT, taskState.getPropAsInt(Task.PERSISTENT_STATE))); } } } List<WorkUnit> workUnits = Lists.newArrayList(); for (int i = 0; i < numTasks; i++) { workUnits.add(createWorkUnit(i, eventBusId)); } return workUnits; }
private void finalizeDatasetState(JobState.DatasetState datasetState, String datasetUrn) { for (TaskState taskState : datasetState.getTaskStates()) { // Backoff the actual high watermark to the low watermark for each task that has not been committed if (taskState.getWorkingState() != WorkUnitState.WorkingState.COMMITTED) { taskState.backoffActualHighWatermark(); if (this.jobContext.getJobCommitPolicy() == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS) { // Determine the final dataset state based on the task states (post commit) and the job commit policy. // 1. If COMMIT_ON_FULL_SUCCESS is used, the processing of the dataset is considered failed if any // task for the dataset failed to be committed. // 2. Otherwise, the processing of the dataset is considered successful even if some tasks for the // dataset failed to be committed. datasetState.setState(JobState.RunningState.FAILED); Optional<String> taskStateException = taskState.getTaskFailureException(); log.warn("At least one task did not committed successfully. Setting dataset state to FAILED.", taskStateException.isPresent() ? taskStateException.get() : "Exception not set."); } } } datasetState.setId(datasetUrn); }
public void testLaunchJob() throws Exception { final ConcurrentHashMap<String, Boolean> runningMap = new ConcurrentHashMap<>(); // Normal job launcher final Properties properties = generateJobProperties(this.baseConfig, "1", "_1504201348470"); final GobblinHelixJobLauncher gobblinHelixJobLauncher = this.closer.register( new GobblinHelixJobLauncher(properties, this.helixManager, this.appWorkDir, ImmutableList.<Tag<?>>of(), runningMap, java.util.Optional.empty())); gobblinHelixJobLauncher.launchJob(null); final File jobOutputFile = getJobOutputFile(properties); Assert.assertTrue(jobOutputFile.exists()); Schema schema = new Schema.Parser().parse(TestHelper.SOURCE_SCHEMA); TestHelper.assertGenericRecords(jobOutputFile, schema); List<JobState.DatasetState> datasetStates = this.datasetStateStore.getAll(properties.getProperty(ConfigurationKeys.JOB_NAME_KEY), FsDatasetStateStore.CURRENT_DATASET_STATE_FILE_SUFFIX + FsDatasetStateStore.DATASET_STATE_STORE_TABLE_SUFFIX); Assert.assertEquals(datasetStates.size(), 1); JobState.DatasetState datasetState = datasetStates.get(0); Assert.assertEquals(datasetState.getCompletedTasks(), 1); Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED); Assert.assertEquals(datasetState.getTaskStates().size(), 1); Assert.assertEquals(datasetState.getTaskStates().get(0).getWorkingState(), WorkUnitState.WorkingState.COMMITTED); }
@Test(dependsOnMethods = "testPersistDatasetState") public void testGetDatasetState() throws IOException { JobState.DatasetState datasetState = zkDatasetStateStore.getLatestDatasetState(TEST_JOB_NAME, TEST_DATASET_URN); Assert.assertEquals(datasetState.getDatasetUrn(), TEST_DATASET_URN); Assert.assertEquals(datasetState.getJobName(), TEST_JOB_NAME); Assert.assertEquals(datasetState.getJobId(), TEST_JOB_ID); Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED); Assert.assertEquals(datasetState.getStartTime(), this.startTime); Assert.assertEquals(datasetState.getEndTime(), this.startTime + 1000); Assert.assertEquals(datasetState.getDuration(), 1000); Assert.assertEquals(datasetState.getCompletedTasks(), 3); for (int i = 0; i < datasetState.getCompletedTasks(); i++) { TaskState taskState = datasetState.getTaskStates().get(i); Assert.assertEquals(taskState.getJobId(), TEST_JOB_ID); Assert.assertEquals(taskState.getTaskId(), TEST_TASK_ID_PREFIX + i); Assert.assertEquals(taskState.getId(), TEST_TASK_ID_PREFIX + i); Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED); } }
/** * Finalize a given {@link JobState.DatasetState} before committing the dataset. * * This method is thread-safe. */ private void finalizeDatasetStateBeforeCommit(JobState.DatasetState datasetState) { for (TaskState taskState : datasetState.getTaskStates()) { if (taskState.getWorkingState() != WorkUnitState.WorkingState.SUCCESSFUL && this.jobContext.getJobCommitPolicy() == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS) { // The dataset state is set to FAILED if any task failed and COMMIT_ON_FULL_SUCCESS is used datasetState.setState(JobState.RunningState.FAILED); datasetState.incrementJobFailures(); return; } } datasetState.setState(JobState.RunningState.SUCCESSFUL); datasetState.setNoJobFailure(); }
@Override public DataPublisher createDataPublisher(JobState.DatasetState datasetState) { EventBus eventBus = null; if (datasetState.getTaskStates().get(0).contains(EVENTBUS_ID_KEY)) { eventBus = TestingEventBuses.getEventBus(datasetState.getTaskStates().get(0).getProp(EVENTBUS_ID_KEY)); } return new Publisher(datasetState, eventBus); }