public DatasetVerificationException (Dataset dataset, Throwable cause) { super ("Dataset:" + dataset.datasetURN() + " Exception:" + cause); this.dataset = dataset; this.cause = cause; } }
private Stream<WorkUnit> createWorkUnitStream(SourceState state) throws IOException { IterableDatasetFinder datasetsFinder = createDatasetsFinder(state); Stream<Dataset> datasetStream = datasetsFinder.getDatasetsStream(0, null); if (this.drilldownIntoPartitions) { return datasetStream.flatMap(dataset -> { if (dataset instanceof PartitionableDataset) { try { return (Stream<PartitionableDataset.DatasetPartition>) ((PartitionableDataset) dataset).getPartitions(0, null); } catch (IOException ioe) { log.error("Failed to get partitions for dataset " + dataset.getUrn()); return Stream.empty(); } } else { return Stream.of(new DatasetWrapper(dataset)); } }).map(this::workUnitForPartitionInternal).filter(Objects::nonNull); } else { return datasetStream.map(this::workUnitForDataset).filter(Objects::nonNull); } }
DatasetVerificationException dve = (DatasetVerificationException) exc.getCause(); failedDatasets.add(dve.dataset); failedReasonMap.put(dve.dataset.getUrn(), ExceptionUtils.getFullStackTrace(dve.cause)); } else { VerifiedDataset vd = ((Either.Left<VerifiedDataset, ExecutionException>) either).getLeft(); if (!vd.verifiedResult.allVerificationPassed) { if (vd.verifiedResult.shouldRetry) { log.debug ("Dataset {} verification has failure but should retry", vd.dataset.datasetURN()); failedDatasets.add(vd.dataset); failedReasonMap.put(vd.dataset.getUrn(), vd.verifiedResult.failedReason); } else { log.debug ("Dataset {} verification has failure but no need to retry", vd.dataset.datasetURN()); log.info ("{} is timed out and give up the verification, adding a failed task", dataset.datasetURN()); this.workUnitIterator.addWorkUnit (createWorkUnitForFailure(dataset, failedReasonMap.get(dataset.getUrn())));
DatasetVerificationException dve = (DatasetVerificationException) exc.getCause(); failedDatasets.add(dve.dataset); failedReasonMap.put(dve.dataset.getUrn(), ExceptionUtils.getFullStackTrace(dve.cause)); } else { VerifiedDataset vd = ((Either.Left<VerifiedDataset, ExecutionException>) either).getLeft(); if (!vd.verifiedResult.allVerificationPassed) { if (vd.verifiedResult.shouldRetry) { log.debug ("Dataset {} verification has failure but should retry", vd.dataset.datasetURN()); failedDatasets.add(vd.dataset); failedReasonMap.put(vd.dataset.getUrn(), vd.verifiedResult.failedReason); } else { log.debug ("Dataset {} verification has failure but no need to retry", vd.dataset.datasetURN()); log.info ("{} is timed out and give up the verification, adding a failed task", dataset.datasetURN()); this.workUnitIterator.addWorkUnit (createWorkUnitForFailure(dataset, failedReasonMap.get(dataset.getUrn())));
@Override public String getUrn() { return this.dataset.datasetURN(); } }
private void submitUnfulfilledRequestEventsHelper(List<FileSet<CopyEntity>> fileSetList, String eventName) { for (FileSet<CopyEntity> fileSet : fileSetList) { GobblinTrackingEvent event = GobblinTrackingEvent.newBuilder().setName(eventName).setNamespace(CopySource.class.getName()).setMetadata( ImmutableMap.<String, String>builder() .put(ConfigurationKeys.DATASET_URN_KEY, fileSet.getDataset().getUrn()) .put(FILESET_TOTAL_ENTITIES, Integer.toString(fileSet.getTotalEntities())) .put(FILESET_TOTAL_SIZE_IN_BYTES, Long.toString(fileSet.getTotalSizeInBytes())) .put(FILESET_NAME, fileSet.getName()).build()).build(); this.metricContext.submitEvent(event); } }
@Override public String toString() { return this.dataset.datasetURN() + "@" + this.name; } }
@Override protected WorkUnit workUnitForDataset(Dataset dataset) { WorkUnit workUnit = new WorkUnit(); workUnit.setProp(DATASET_URN, dataset.getUrn()); return workUnit; }
@Override default String getUrn() { return datasetURN(); } }
private void testDatasetStates(List<Dataset> datasets, List<LongWatermark> watermarks, String jobName) throws IOException { Preconditions.checkArgument(datasets.size() >= 2); for (int i = 0; i < datasets.size(); i++) { JobState.DatasetState datasetState = this.fsDatasetStateStore.getLatestDatasetState(jobName, datasets.get(i).getUrn()); Assert.assertEquals(datasetState.getDatasetUrn(), datasets.get(i).getUrn()); Assert.assertEquals(datasetState.getJobName(), jobName); Assert.assertEquals(datasetState.getJobId(), TEST_JOB_ID); Assert.assertEquals(datasetState.getState(), JobState.RunningState.COMMITTED); Assert.assertEquals(datasetState.getStartTime(), this.startTime); Assert.assertEquals(datasetState.getEndTime(), this.startTime + 1000); Assert.assertEquals(datasetState.getDuration(), 1000); Assert.assertEquals(datasetState.getCompletedTasks(), 1); TaskState taskState = datasetState.getTaskStates().get(0); Assert.assertEquals(taskState.getJobId(), TEST_JOB_ID); Assert.assertEquals(taskState.getTaskId(), TEST_TASK_ID_PREFIX + i); Assert.assertEquals(taskState.getId(), TEST_TASK_ID_PREFIX + i); Assert.assertEquals(taskState.getWorkingState(), WorkUnitState.WorkingState.COMMITTED); if (i < datasets.size() - 1) { Assert.assertEquals(taskState.getActualHighWatermark(LongWatermark.class).getValue(), watermarks.get(i).getValue()); } } }
@Override public void run() { log.error ("Compaction job for " + dataset.datasetURN() + " is failed because of {}", failedReason); this.workingState = WorkUnitState.WorkingState.FAILED; }
@Override protected WorkUnit workUnitForDatasetPartition(PartitionableDataset.DatasetPartition partition) { WorkUnit workUnit = new WorkUnit(); workUnit.setProp(DATASET_URN, partition.getDataset().getUrn()); workUnit.setProp(PARTITION_URN, partition.getUrn()); return workUnit; }
@Override public void onSuccess(@Nullable Void result) { ComplianceRetentionJob.this.finishCleanSignal.get().countDown(); log.info("Successfully cleaned: " + dataset.datasetURN()); }
public void persistDatasetState(List<Dataset> datasets, List<LongWatermark> watermarks, String jobName) throws IOException { Preconditions.checkArgument(datasets.size() >= 2); for (int i = 0; i < datasets.size(); i++) { String datasetUrn = datasets.get(i).getUrn(); JobState.DatasetState datasetState = new JobState.DatasetState(jobName, TEST_JOB_ID); datasetState.setDatasetUrn(datasetUrn); datasetState.setState(JobState.RunningState.COMMITTED); datasetState.setId(datasetUrn); datasetState.setStartTime(this.startTime); datasetState.setEndTime(this.startTime + 1000); datasetState.setDuration(1000); TaskState taskState = new TaskState(); taskState.setJobId(TEST_JOB_ID); taskState.setTaskId(TEST_TASK_ID_PREFIX + i); taskState.setId(TEST_TASK_ID_PREFIX + i); taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED); if (i < datasets.size() - 1) { taskState.setActualHighWatermark(watermarks.get(i)); } datasetState.addTaskState(taskState); this.fsDatasetStateStore.persistDatasetState(datasetUrn, datasetState); } }
@Override public void onSuccess(@Nullable Void result) { ComplianceRestoreJob.this.finishCleanSignal.get().countDown(); log.info("Successfully restored: " + dataset.datasetURN()); }
@Override protected WorkUnit workUnitForDataset(Dataset dataset) { WorkUnit workUnit = new WorkUnit(); if(isDatasetStateStoreEnabled) { JobState.DatasetState datasetState = null; try { datasetState = (JobState.DatasetState) this.fsDatasetStateStore.getLatestDatasetState(this.jobName, dataset.getUrn()); } catch (IOException e) { throw new RuntimeException(e); } LongWatermark previousWatermark; if(datasetState != null) { previousWatermark = datasetState.getTaskStatesAsWorkUnitStates().get(0).getActualHighWatermark(LongWatermark.class); } else { previousWatermark = new LongWatermark(0); } workUnit.setWatermarkInterval(new WatermarkInterval(previousWatermark, new LongWatermark(LAST_PROCESSED_TS))); } return workUnit; }
@Override public void onSuccess(@Nullable Void result) { ComplianceValidationJob.this.finishCleanSignal.get().countDown(); log.info("Successfully validated: " + dataset.datasetURN()); }
Assert.assertEquals(fileSet.getDataset().getUrn(), "/test"); Assert.assertEquals(fileSet.getTotalEntities(), 5); Assert.assertEquals(fileSet.getTotalSizeInBytes(), 50); Assert.assertEquals(fileSet.getDataset().getUrn(), "/test"); Assert.assertEquals(fileSet.getTotalEntities(), 5); Assert.assertEquals(fileSet.getTotalSizeInBytes(), 50);
private int findTier(Requestor<SimpleDatasetRequest> requestor) { Dataset dataset = ((SimpleDatasetRequestor) requestor).getDataset(); for (Map.Entry<Integer, Pattern> tier : tiersMap.entrySet()) { Pattern pattern = tier.getValue(); if (pattern.matcher(dataset.datasetURN()).find()) { return tier.getKey(); } } return Integer.MAX_VALUE; } }
@Override protected WorkUnit workUnitForDatasetPartition(PartitionableDataset.DatasetPartition partition) { WorkUnit workUnit = new WorkUnit(); if(isDatasetStateStoreEnabled) { String datasetUrn = partition.getDataset().getUrn()+"@"+partition.getUrn(); JobState.DatasetState datasetState = null; try { datasetState = (JobState.DatasetState) this.fsDatasetStateStore.getLatestDatasetState(this.jobName, datasetUrn); } catch (IOException e) { throw new RuntimeException(e); } LongWatermark previousWatermark; if(datasetState != null) { previousWatermark = datasetState.getTaskStatesAsWorkUnitStates().get(0).getActualHighWatermark(LongWatermark.class); } else { previousWatermark = new LongWatermark(0); } workUnit.setWatermarkInterval(new WatermarkInterval(previousWatermark, new LongWatermark(LAST_PROCESSED_TS))); } return workUnit; }