/** * Put data {@link Descriptor}s of a destination dataset to a state * * @param descriptors It can be a single item list which just has the dataset descriptor or a list * of dataset partition descriptors */ public void putDestination(List<Descriptor> descriptors, int branchId, State state) { if (!hasLineageInfo(state)) { log.warn("State has no lineage info but branch " + branchId + " puts {} descriptors", descriptors.size()); return; } log.info(String.format("Put destination %s for branch %d", Descriptor.toJson(descriptors), branchId)); synchronized (state.getProp(getKey(NAME_KEY))) { List<Descriptor> resolvedDescriptors = new ArrayList<>(); for (Descriptor descriptor : descriptors) { Descriptor resolvedDescriptor = resolver.resolve(descriptor, state); if (resolvedDescriptor == null) { continue; } resolvedDescriptors.add(resolvedDescriptor); } state.setProp(getKey(BRANCH, branchId, LineageEventBuilder.DESTINATION), Descriptor.toJson(resolvedDescriptors)); } }
/** * Set source {@link DatasetDescriptor} of a lineage event * * <p> * Only the {@link org.apache.gobblin.source.Source} or its {@link org.apache.gobblin.source.extractor.Extractor} * is supposed to set the source for a work unit of a dataset * </p> * * @param state state about a {@link org.apache.gobblin.source.workunit.WorkUnit} * */ public void setSource(Descriptor source, State state) { Descriptor descriptor = resolver.resolve(source, state); if (descriptor == null) { return; } state.setProp(getKey(NAME_KEY), descriptor.getName()); state.setProp(getKey(LineageEventBuilder.SOURCE), Descriptor.toJson(descriptor)); }
/** * Put data {@link Descriptor}s of a destination dataset to a state * * @param descriptors It can be a single item list which just has the dataset descriptor or a list * of dataset partition descriptors */ public void putDestination(List<Descriptor> descriptors, int branchId, State state) { if (!hasLineageInfo(state)) { log.warn("State has no lineage info but branch " + branchId + " puts {} descriptors", descriptors.size()); return; } log.info(String.format("Put destination %s for branch %d", Descriptor.toJson(descriptors), branchId)); synchronized (state.getProp(getKey(NAME_KEY))) { List<Descriptor> resolvedDescriptors = new ArrayList<>(); for (Descriptor descriptor : descriptors) { Descriptor resolvedDescriptor = resolver.resolve(descriptor, state); if (resolvedDescriptor == null) { continue; } resolvedDescriptors.add(resolvedDescriptor); } state.setProp(getKey(BRANCH, branchId, LineageEventBuilder.DESTINATION), Descriptor.toJson(resolvedDescriptors)); } }
/** * Set source {@link DatasetDescriptor} of a lineage event * * <p> * Only the {@link org.apache.gobblin.source.Source} or its {@link org.apache.gobblin.source.extractor.Extractor} * is supposed to set the source for a work unit of a dataset * </p> * * @param state state about a {@link org.apache.gobblin.source.workunit.WorkUnit} * */ public void setSource(Descriptor source, State state) { Descriptor descriptor = resolver.resolve(source, state); if (descriptor == null) { return; } state.setProp(getKey(NAME_KEY), descriptor.getName()); state.setProp(getKey(LineageEventBuilder.SOURCE), Descriptor.toJson(descriptor)); }