/** * Put data {@link Descriptor}s of a destination dataset to a state * * @param descriptors It can be a single item list which just has the dataset descriptor or a list * of dataset partition descriptors */ public void putDestination(List<Descriptor> descriptors, int branchId, State state) { if (!hasLineageInfo(state)) { log.warn("State has no lineage info but branch " + branchId + " puts {} descriptors", descriptors.size()); return; } log.info(String.format("Put destination %s for branch %d", Descriptor.toJson(descriptors), branchId)); synchronized (state.getProp(getKey(NAME_KEY))) { List<Descriptor> resolvedDescriptors = new ArrayList<>(); for (Descriptor descriptor : descriptors) { Descriptor resolvedDescriptor = resolver.resolve(descriptor, state); if (resolvedDescriptor == null) { continue; } resolvedDescriptors.add(resolvedDescriptor); } state.setProp(getKey(BRANCH, branchId, LineageEventBuilder.DESTINATION), Descriptor.toJson(resolvedDescriptors)); } }
@Test public void testAsDescriptorResolver() { DescriptorResolver resolver = new TestDatasetResolver(); State state = new State(); // Test dataset descriptor DatasetDescriptor dataset = new DatasetDescriptor("hdfs", "/data/tracking/PageViewEvent"); Descriptor descriptor = resolver.resolve(dataset, state); Assert.assertTrue(descriptor.getClass().isAssignableFrom(DatasetDescriptor.class)); Assert.assertEquals(descriptor.getName(), TestDatasetResolver.DATASET_NAME); // Test partition descriptor String partitionName = "hourly/2018/08/14/18"; PartitionDescriptor partition = new PartitionDescriptor(partitionName, dataset); descriptor = resolver.resolve(partition, state); Assert.assertTrue(descriptor.getClass().isAssignableFrom(DatasetDescriptor.class)); Assert.assertEquals(descriptor.getName(), TestDatasetResolver.DATASET_NAME); // Test unsupported descriptor Assert.assertEquals(resolver.resolve(new MockDescriptor("test"), state), null); }
/** * Set source {@link DatasetDescriptor} of a lineage event * * <p> * Only the {@link org.apache.gobblin.source.Source} or its {@link org.apache.gobblin.source.extractor.Extractor} * is supposed to set the source for a work unit of a dataset * </p> * * @param state state about a {@link org.apache.gobblin.source.workunit.WorkUnit} * */ public void setSource(Descriptor source, State state) { Descriptor descriptor = resolver.resolve(source, state); if (descriptor == null) { return; } state.setProp(getKey(NAME_KEY), descriptor.getName()); state.setProp(getKey(LineageEventBuilder.SOURCE), Descriptor.toJson(descriptor)); }
/** * Put data {@link Descriptor}s of a destination dataset to a state * * @param descriptors It can be a single item list which just has the dataset descriptor or a list * of dataset partition descriptors */ public void putDestination(List<Descriptor> descriptors, int branchId, State state) { if (!hasLineageInfo(state)) { log.warn("State has no lineage info but branch " + branchId + " puts {} descriptors", descriptors.size()); return; } log.info(String.format("Put destination %s for branch %d", Descriptor.toJson(descriptors), branchId)); synchronized (state.getProp(getKey(NAME_KEY))) { List<Descriptor> resolvedDescriptors = new ArrayList<>(); for (Descriptor descriptor : descriptors) { Descriptor resolvedDescriptor = resolver.resolve(descriptor, state); if (resolvedDescriptor == null) { continue; } resolvedDescriptors.add(resolvedDescriptor); } state.setProp(getKey(BRANCH, branchId, LineageEventBuilder.DESTINATION), Descriptor.toJson(resolvedDescriptors)); } }
/** * Set source {@link DatasetDescriptor} of a lineage event * * <p> * Only the {@link org.apache.gobblin.source.Source} or its {@link org.apache.gobblin.source.extractor.Extractor} * is supposed to set the source for a work unit of a dataset * </p> * * @param state state about a {@link org.apache.gobblin.source.workunit.WorkUnit} * */ public void setSource(Descriptor source, State state) { Descriptor descriptor = resolver.resolve(source, state); if (descriptor == null) { return; } state.setProp(getKey(NAME_KEY), descriptor.getName()); state.setProp(getKey(LineageEventBuilder.SOURCE), Descriptor.toJson(descriptor)); }