Descriptor source = Descriptor.fromJson(state.getProp(getKey(LineageEventBuilder.SOURCE))); List<Descriptor> descriptors = Descriptor.fromJsonList(entry.getValue().toString()); for (Descriptor descriptor : descriptors) { LineageEventBuilder event = new LineageEventBuilder(name);
private static LineageEventBuilder find(Collection<LineageEventBuilder> events, String partitionName) { for (LineageEventBuilder event : events) { if (event.getDestination().getName().equals(partitionName)) { return event; } } return null; }
public Descriptor copy() { return new Descriptor(name); }
/** * Set source {@link DatasetDescriptor} of a lineage event * * <p> * Only the {@link org.apache.gobblin.source.Source} or its {@link org.apache.gobblin.source.extractor.Extractor} * is supposed to set the source for a work unit of a dataset * </p> * * @param state state about a {@link org.apache.gobblin.source.workunit.WorkUnit} * */ public void setSource(Descriptor source, State state) { Descriptor descriptor = resolver.resolve(source, state); if (descriptor == null) { return; } state.setProp(getKey(NAME_KEY), descriptor.getName()); state.setProp(getKey(LineageEventBuilder.SOURCE), Descriptor.toJson(descriptor)); }
/** * Deserialize the json string to the a {@link Descriptor} object */ public static Descriptor fromJson(String json) { return fromJson(json, Descriptor.class); }
/** * Set source {@link DatasetDescriptor} of a lineage event * * <p> * Only the {@link org.apache.gobblin.source.Source} or its {@link org.apache.gobblin.source.extractor.Extractor} * is supposed to set the source for a work unit of a dataset * </p> * * @param state state about a {@link org.apache.gobblin.source.workunit.WorkUnit} * */ public void setSource(Descriptor source, State state) { Descriptor descriptor = resolver.resolve(source, state); if (descriptor == null) { return; } state.setProp(getKey(NAME_KEY), descriptor.getName()); state.setProp(getKey(LineageEventBuilder.SOURCE), Descriptor.toJson(descriptor)); }
/** * Create a {@link LineageEventBuilder} from a {@link GobblinEventBuilder}. An inverse function * to {@link LineageEventBuilder#build()} */ public static LineageEventBuilder fromEvent(GobblinTrackingEvent event) { Map<String, String> metadata = event.getMetadata(); LineageEventBuilder lineageEvent = new LineageEventBuilder(event.getName()); metadata.forEach((key, value) -> { switch (key) { case SOURCE: lineageEvent.setSource(Descriptor.fromJson(value)); break; case DESTINATION: lineageEvent.setDestination(Descriptor.fromJson(value)); break; default: lineageEvent.addMetadata(key, value); break; } }); return lineageEvent; }
/** * Put data {@link Descriptor}s of a destination dataset to a state * * @param descriptors It can be a single item list which just has the dataset descriptor or a list * of dataset partition descriptors */ public void putDestination(List<Descriptor> descriptors, int branchId, State state) { if (!hasLineageInfo(state)) { log.warn("State has no lineage info but branch " + branchId + " puts {} descriptors", descriptors.size()); return; } log.info(String.format("Put destination %s for branch %d", Descriptor.toJson(descriptors), branchId)); synchronized (state.getProp(getKey(NAME_KEY))) { List<Descriptor> resolvedDescriptors = new ArrayList<>(); for (Descriptor descriptor : descriptors) { Descriptor resolvedDescriptor = resolver.resolve(descriptor, state); if (resolvedDescriptor == null) { continue; } resolvedDescriptors.add(resolvedDescriptor); } state.setProp(getKey(BRANCH, branchId, LineageEventBuilder.DESTINATION), Descriptor.toJson(resolvedDescriptors)); } }
Descriptor source = Descriptor.fromJson(state.getProp(getKey(LineageEventBuilder.SOURCE))); List<Descriptor> descriptors = Descriptor.fromJsonList(entry.getValue().toString()); for (Descriptor descriptor : descriptors) { LineageEventBuilder event = new LineageEventBuilder(name);
/** * Deserialize the json string to the a {@link Descriptor} object */ public static Descriptor fromJson(String json) { return fromJson(json, Descriptor.class); }
@Test public void testAsDescriptorResolver() { DescriptorResolver resolver = new TestDatasetResolver(); State state = new State(); // Test dataset descriptor DatasetDescriptor dataset = new DatasetDescriptor("hdfs", "/data/tracking/PageViewEvent"); Descriptor descriptor = resolver.resolve(dataset, state); Assert.assertTrue(descriptor.getClass().isAssignableFrom(DatasetDescriptor.class)); Assert.assertEquals(descriptor.getName(), TestDatasetResolver.DATASET_NAME); // Test partition descriptor String partitionName = "hourly/2018/08/14/18"; PartitionDescriptor partition = new PartitionDescriptor(partitionName, dataset); descriptor = resolver.resolve(partition, state); Assert.assertTrue(descriptor.getClass().isAssignableFrom(DatasetDescriptor.class)); Assert.assertEquals(descriptor.getName(), TestDatasetResolver.DATASET_NAME); // Test unsupported descriptor Assert.assertEquals(resolver.resolve(new MockDescriptor("test"), state), null); }
public Descriptor copy() { return new Descriptor(name); }
/** * Create a {@link LineageEventBuilder} from a {@link GobblinEventBuilder}. An inverse function * to {@link LineageEventBuilder#build()} */ public static LineageEventBuilder fromEvent(GobblinTrackingEvent event) { Map<String, String> metadata = event.getMetadata(); LineageEventBuilder lineageEvent = new LineageEventBuilder(event.getName()); metadata.forEach((key, value) -> { switch (key) { case SOURCE: lineageEvent.setSource(Descriptor.fromJson(value)); break; case DESTINATION: lineageEvent.setDestination(Descriptor.fromJson(value)); break; default: lineageEvent.addMetadata(key, value); break; } }); return lineageEvent; }
@Test public void testPartitionDescriptor() { DatasetDescriptor dataset = new DatasetDescriptor("hdfs", "/data/tracking/PageViewEvent"); String partitionName = "hourly/2018/08/14/18"; PartitionDescriptor partition = new PartitionDescriptor(partitionName, dataset); // Test copy with new dataset DatasetDescriptor dataset2 = new DatasetDescriptor("hive", "/data/tracking/PageViewEvent"); Descriptor partition2 = partition.copyWithNewDataset(dataset2); Assert.assertEquals(partition2.getName(), partition.getName()); Assert.assertEquals(((PartitionDescriptor)partition2).getDataset(), dataset2); // Test copy PartitionDescriptor partition3 = partition.copy(); Assert.assertEquals(partition3.getDataset(), dataset); Assert.assertEquals(partition3.getName(), partitionName); } }
/** * Put data {@link Descriptor}s of a destination dataset to a state * * @param descriptors It can be a single item list which just has the dataset descriptor or a list * of dataset partition descriptors */ public void putDestination(List<Descriptor> descriptors, int branchId, State state) { if (!hasLineageInfo(state)) { log.warn("State has no lineage info but branch " + branchId + " puts {} descriptors", descriptors.size()); return; } log.info(String.format("Put destination %s for branch %d", Descriptor.toJson(descriptors), branchId)); synchronized (state.getProp(getKey(NAME_KEY))) { List<Descriptor> resolvedDescriptors = new ArrayList<>(); for (Descriptor descriptor : descriptors) { Descriptor resolvedDescriptor = resolver.resolve(descriptor, state); if (resolvedDescriptor == null) { continue; } resolvedDescriptors.add(resolvedDescriptor); } state.setProp(getKey(BRANCH, branchId, LineageEventBuilder.DESTINATION), Descriptor.toJson(resolvedDescriptors)); } }