private void maySubmitLineageEvent(JobState.DatasetState datasetState) { Collection<TaskState> allStates = datasetState.getTaskStates(); Collection<TaskState> states = Lists.newArrayList(); // Filter out failed states or states that don't have lineage info for (TaskState state : allStates) { if (state.getWorkingState() == WorkUnitState.WorkingState.COMMITTED && LineageInfo.hasLineageInfo(state)) { states.add(state); } } if (states.size() == 0) { log.info("Will not submit lineage events as no state contains lineage info"); return; } try { if (StringUtils.isEmpty(datasetUrn)) { // This dataset may contain different kinds of LineageEvent for (Map.Entry<String, Collection<TaskState>> entry : aggregateByLineageEvent(states).entrySet()) { submitLineageEvent(entry.getKey(), entry.getValue()); } } else { submitLineageEvent(datasetUrn, states); } } finally { // Purge lineage info from all states for (TaskState taskState : allStates) { LineageInfo.purgeLineageInfo(taskState); } } }
/** * Put data {@link Descriptor}s of a destination dataset to a state * * @param descriptors It can be a single item list which just has the dataset descriptor or a list * of dataset partition descriptors */ public void putDestination(List<Descriptor> descriptors, int branchId, State state) { if (!hasLineageInfo(state)) { log.warn("State has no lineage info but branch " + branchId + " puts {} descriptors", descriptors.size()); return; } log.info(String.format("Put destination %s for branch %d", Descriptor.toJson(descriptors), branchId)); synchronized (state.getProp(getKey(NAME_KEY))) { List<Descriptor> resolvedDescriptors = new ArrayList<>(); for (Descriptor descriptor : descriptors) { Descriptor resolvedDescriptor = resolver.resolve(descriptor, state); if (resolvedDescriptor == null) { continue; } resolvedDescriptors.add(resolvedDescriptor); } state.setProp(getKey(BRANCH, branchId, LineageEventBuilder.DESTINATION), Descriptor.toJson(resolvedDescriptors)); } }
private void maySubmitLineageEvent(JobState.DatasetState datasetState) { Collection<TaskState> allStates = datasetState.getTaskStates(); Collection<TaskState> states = Lists.newArrayList(); // Filter out failed states or states that don't have lineage info for (TaskState state : allStates) { if (state.getWorkingState() == WorkUnitState.WorkingState.COMMITTED && LineageInfo.hasLineageInfo(state)) { states.add(state); } } if (states.size() == 0) { log.info("Will not submit lineage events as no state contains lineage info"); return; } try { if (StringUtils.isEmpty(datasetUrn)) { // This dataset may contain different kinds of LineageEvent for (Map.Entry<String, Collection<TaskState>> entry : aggregateByLineageEvent(states).entrySet()) { submitLineageEvent(entry.getKey(), entry.getValue()); } } else { submitLineageEvent(datasetUrn, states); } } finally { // Purge lineage info from all states for (TaskState taskState : allStates) { LineageInfo.purgeLineageInfo(taskState); } } }
/** * Put data {@link Descriptor}s of a destination dataset to a state * * @param descriptors It can be a single item list which just has the dataset descriptor or a list * of dataset partition descriptors */ public void putDestination(List<Descriptor> descriptors, int branchId, State state) { if (!hasLineageInfo(state)) { log.warn("State has no lineage info but branch " + branchId + " puts {} descriptors", descriptors.size()); return; } log.info(String.format("Put destination %s for branch %d", Descriptor.toJson(descriptors), branchId)); synchronized (state.getProp(getKey(NAME_KEY))) { List<Descriptor> resolvedDescriptors = new ArrayList<>(); for (Descriptor descriptor : descriptors) { Descriptor resolvedDescriptor = resolver.resolve(descriptor, state); if (resolvedDescriptor == null) { continue; } resolvedDescriptors.add(resolvedDescriptor); } state.setProp(getKey(BRANCH, branchId, LineageEventBuilder.DESTINATION), Descriptor.toJson(resolvedDescriptors)); } }