private static Map<String, org.apache.gobblin.runtime.JobState.DatasetState> adaptDatasetStateMap( Map<String, DatasetState> previousDatasetStates) { return previousDatasetStates.entrySet() .stream() .collect(Collectors.toMap(Map.Entry::getKey, e -> new org.apache.gobblin.runtime.JobState.DatasetState(e.getValue().getJobName(), e.getValue().getId()))); }
Object writable = reader.getValueClass() == JobState.class ? new JobState() : new JobState.DatasetState(); if (writable instanceof JobState.DatasetState) { states.add((JobState.DatasetState) writable); writable = new JobState.DatasetState(); } else { states.add(((JobState) writable).newDatasetState(true));
deserializeConf)) { Object writable = reader.getValueClass() == JobState.class ? new JobState() : new JobState.DatasetState();
Object writable = reader.getValueClass() == JobState.class ? new JobState() : new JobState.DatasetState(); if (writable instanceof JobState.DatasetState) { states.add((JobState.DatasetState) writable); writable = new JobState.DatasetState(); } else { states.add(((JobState) writable).newDatasetState(true));
deserializeConf)) { Object writable = reader.getValueClass() == JobState.class ? new JobState() : new JobState.DatasetState();
/** * Create a new {@link JobState.DatasetState} based on this {@link JobState} instance. * * @param fullCopy whether to do a full copy of this {@link JobState} instance * @return a new {@link JobState.DatasetState} object */ public DatasetState newDatasetState(boolean fullCopy) { DatasetState datasetState = new DatasetState(this.jobName, this.jobId); datasetState.setStartTime(this.startTime); datasetState.setEndTime(this.endTime); datasetState.setDuration(this.duration); if (fullCopy) { datasetState.setState(this.state); datasetState.setTaskCount(this.taskCount); datasetState.addTaskStates(this.taskStates.values()); datasetState.addSkippedTaskStates(this.skippedTaskStates.values()); } return datasetState; }
private static Map<String, org.apache.gobblin.runtime.JobState.DatasetState> adaptDatasetStateMap( Map<String, DatasetState> previousDatasetStates) { return previousDatasetStates.entrySet() .stream() .collect(Collectors.toMap(Map.Entry::getKey, e -> new org.apache.gobblin.runtime.JobState.DatasetState(e.getValue().getJobName(), e.getValue().getId()))); }
JobState.DatasetState datasetState = new JobState.DatasetState(TEST_JOB_NAME1, getJobId(TEST_JOB_ID, 1));
datasetStateMap.put(Integer.toString(i), new JobState.DatasetState());
datasetStateMap.put(Integer.toString(i), new JobState.DatasetState());
datasetStateMap.put(Integer.toString(i), new JobState.DatasetState());
JobState.DatasetState dataset2State = new JobState.DatasetState("job1", "job1_id2"); dataset2State.setDatasetUrn("dataset2"); dataset2State.setId("dataset2"); dataset2State.addTaskState(taskState); store.persistDatasetState("dataset1", new JobState.DatasetState("job1", "job1_id1")); store.persistDatasetState("dataset1", new JobState.DatasetState("job1", "job1_id2")); store.persistDatasetState("dataset2", dataset2State); store.persistDatasetState("dataset1", new JobState.DatasetState("job2", "job2_id1")); store.persistDatasetState("", new JobState.DatasetState("job3", "job3_id1"));
@Test(dependsOnMethods = "testGetJobState") public void testPersistDatasetState() throws IOException { JobState.DatasetState datasetState = new JobState.DatasetState(TEST_JOB_NAME, TEST_JOB_ID);
@Test(dependsOnMethods = "testGetJobState") public void testPersistDatasetState() throws IOException { JobState.DatasetState datasetState = new JobState.DatasetState(TEST_JOB_NAME, TEST_JOB_ID); datasetState.setDatasetUrn(TEST_DATASET_URN); datasetState.setState(JobState.RunningState.COMMITTED); datasetState.setId(TEST_DATASET_URN); datasetState.setStartTime(this.startTime); datasetState.setEndTime(this.startTime + 1000); datasetState.setDuration(1000); for (int i = 0; i < 3; i++) { TaskState taskState = new TaskState(); taskState.setJobId(TEST_JOB_ID); taskState.setTaskId(TEST_TASK_ID_PREFIX + i); taskState.setId(TEST_TASK_ID_PREFIX + i); taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED); datasetState.addTaskState(taskState); } zkDatasetStateStore.persistDatasetState(TEST_DATASET_URN, datasetState); // persist a second dataset state to test that retrieval of multiple dataset states works datasetState.setDatasetUrn(TEST_DATASET_URN2); datasetState.setId(TEST_DATASET_URN2); datasetState.setDuration(2000); zkDatasetStateStore.persistDatasetState(TEST_DATASET_URN2, datasetState); // second job name for testing getting store names in a later test case datasetState.setJobName(TEST_JOB_NAME2); zkDatasetStateStore.persistDatasetState(TEST_DATASET_URN2, datasetState); }
@BeforeClass public void setUp() throws IOException { this.fs = FileSystem.getLocal(new Configuration()); this.fs.delete(new Path(ROOT_DIR), true); Path storeRootDir = new Path(ROOT_DIR, "store"); Path dir1 = new Path(ROOT_DIR, "dir1"); Path dir2 = new Path(ROOT_DIR, "dir2"); this.fs.mkdirs(dir1); this.fs.mkdirs(dir2); Path src1 = new Path(dir1, "file1"); Path src2 = new Path(dir2, "file2"); Path dst1 = new Path(dir2, "file1"); Path dst2 = new Path(dir1, "file2"); this.fs.createNewFile(src1); this.fs.createNewFile(src2); DatasetState ds = new DatasetState("job-name", "job-id"); ds.setDatasetUrn("urn"); ds.setNoJobFailure(); State state = new State(); state.setProp(ConfigurationKeys.STATE_STORE_ROOT_DIR_KEY, storeRootDir.toString()); this.sequence = new CommitSequence.Builder().withJobName("testjob").withDatasetUrn("testurn") .beginStep(FsRenameCommitStep.Builder.class).from(src1).to(dst1).withProps(state).endStep() .beginStep(FsRenameCommitStep.Builder.class).from(src2).to(dst2).withProps(state).endStep() .beginStep(DatasetStateCommitStep.Builder.class).withDatasetUrn("urn").withDatasetState(ds).withProps(state) .endStep().build(); }
public void persistDatasetState(List<Dataset> datasets, List<LongWatermark> watermarks, String jobName) throws IOException { Preconditions.checkArgument(datasets.size() >= 2); for (int i = 0; i < datasets.size(); i++) { String datasetUrn = datasets.get(i).getUrn(); JobState.DatasetState datasetState = new JobState.DatasetState(jobName, TEST_JOB_ID); datasetState.setDatasetUrn(datasetUrn); datasetState.setState(JobState.RunningState.COMMITTED); datasetState.setId(datasetUrn); datasetState.setStartTime(this.startTime); datasetState.setEndTime(this.startTime + 1000); datasetState.setDuration(1000); TaskState taskState = new TaskState(); taskState.setJobId(TEST_JOB_ID); taskState.setTaskId(TEST_TASK_ID_PREFIX + i); taskState.setId(TEST_TASK_ID_PREFIX + i); taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED); if (i < datasets.size() - 1) { taskState.setActualHighWatermark(watermarks.get(i)); } datasetState.addTaskState(taskState); this.fsDatasetStateStore.persistDatasetState(datasetUrn, datasetState); } }
/** * Create a new {@link JobState.DatasetState} based on this {@link JobState} instance. * * @param fullCopy whether to do a full copy of this {@link JobState} instance * @return a new {@link JobState.DatasetState} object */ public DatasetState newDatasetState(boolean fullCopy) { DatasetState datasetState = new DatasetState(this.jobName, this.jobId); datasetState.setStartTime(this.startTime); datasetState.setEndTime(this.endTime); datasetState.setDuration(this.duration); if (fullCopy) { datasetState.setState(this.state); datasetState.setTaskCount(this.taskCount); datasetState.addTaskStates(this.taskStates.values()); datasetState.addSkippedTaskStates(this.skippedTaskStates.values()); } return datasetState; }
@Test public void testCleanStateStore() throws IOException { File tmpDir = Files.createTempDir(); tmpDir.deleteOnExit(); FileSystem fs = FileSystem.getLocal(new Configuration()); FsDatasetStateStore store = new FsDatasetStateStore(fs, tmpDir.getAbsolutePath()); store.persistDatasetState("dataset1", new JobState.DatasetState("job1", "job1_id1")); store.persistDatasetState("dataset1", new JobState.DatasetState("job1", "job1_id2")); store.persistDatasetState("dataset1", new JobState.DatasetState("job2", "job2_id1")); store.persistDatasetState("", new JobState.DatasetState("job3", "job3_id1")); Properties props = new Properties(); props.setProperty(ConfigurationKeys.STATE_STORE_ROOT_DIR_KEY, tmpDir.getAbsolutePath()); props.setProperty("selection.timeBased.lookbackTime", "0m"); TimeBasedDatasetStoreDatasetFinder datasetFinder = new TimeBasedDatasetStoreDatasetFinder(fs, props); List<DatasetStoreDataset> datasets = datasetFinder.findDatasets(); for (DatasetStoreDataset dataset : datasets) { ((CleanableDataset) dataset).clean(); File jobDir = new File(tmpDir.getAbsolutePath(), dataset.getKey().getStoreName()); Assert.assertEquals(jobDir.list().length, 1); } } }
@Test(dependsOnMethods = "testGetJobState") public void testPersistDatasetState() throws IOException { JobState.DatasetState datasetState = new JobState.DatasetState(TEST_JOB_NAME, TEST_JOB_ID); datasetState.setDatasetUrn(TEST_DATASET_URN); datasetState.setState(JobState.RunningState.COMMITTED); datasetState.setId(TEST_DATASET_URN); datasetState.setStartTime(this.startTime); datasetState.setEndTime(this.startTime + 1000); datasetState.setDuration(1000); for (int i = 0; i < 3; i++) { TaskState taskState = new TaskState(); taskState.setJobId(TEST_JOB_ID); taskState.setTaskId(TEST_TASK_ID_PREFIX + i); taskState.setId(TEST_TASK_ID_PREFIX + i); taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED); datasetState.addTaskState(taskState); } this.fsDatasetStateStore.persistDatasetState(TEST_DATASET_URN, datasetState); }
@BeforeClass public void setUp() throws IOException { FileSystem fs = FileSystem.getLocal(new Configuration()); this.store = new FsCommitSequenceStore(fs, new Path("commit-sequence-store-test")); State props = new State(); props.setId("propsId"); props.setProp("prop1", "valueOfProp1"); props.setProp("prop2", "valueOfProp2"); DatasetState datasetState = new DatasetState(); datasetState.setDatasetUrn(this.datasetUrn); datasetState.incrementJobFailures(); this.sequence = new CommitSequence.Builder().withJobName("testjob").withDatasetUrn("testurn") .beginStep(FsRenameCommitStep.Builder.class).from(new Path("/ab/cd")).to(new Path("/ef/gh")).withProps(props) .endStep().beginStep(DatasetStateCommitStep.Builder.class).withDatasetUrn(this.datasetUrn) .withDatasetState(datasetState).withProps(props).endStep().build(); }