@Override protected void addLineageSourceInfo(SourceState sourceState, SourceEntity entity, WorkUnit workUnit) { DatasetDescriptor source = new DatasetDescriptor(DatasetConstants.PLATFORM_SALESFORCE, entity.getSourceEntityName()); if (lineageInfo.isPresent()) { lineageInfo.get().setSource(source, workUnit); } }
@VisibleForTesting public void setSourceLineageInfo(WorkUnit workUnit, Optional<LineageInfo> lineageInfo) { HiveWorkUnit hiveWorkUnit = new HiveWorkUnit(workUnit); ConvertibleHiveDataset convertibleHiveDataset = (ConvertibleHiveDataset) hiveWorkUnit.getHiveDataset(); DatasetDescriptor sourceDataset = convertibleHiveDataset.getSourceDataset(); if (lineageInfo.isPresent()) { lineageInfo.get().setSource(sourceDataset, workUnit); } } }
private void addLineageInfo(CopyEntity copyEntity, WorkUnit workUnit) { if (copyEntity instanceof CopyableFile) { CopyableFile copyableFile = (CopyableFile) copyEntity; /* * In Gobblin Distcp, the source and target path info of a CopyableFile are determined by its dataset found by * a DatasetFinder. Consequently, the source and destination dataset for the CopyableFile lineage are expected * to be set by the same logic */ if (lineageInfo.isPresent() && copyableFile.getSourceData() != null && copyableFile.getDestinationData() != null) { lineageInfo.get().setSource(copyableFile.getSourceData(), workUnit); } } } }
protected void addLineageSourceInfo(SourceState sourceState, SourceEntity entity, WorkUnit workUnit) { String host = sourceState.getProp(ConfigurationKeys.SOURCE_CONN_HOST_NAME); String port = sourceState.getProp(ConfigurationKeys.SOURCE_CONN_PORT); String database = sourceState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_SCHEMA); String connectionUrl = "jdbc:mysql://" + host.trim() + ":" + port + "/" + database.trim(); DatasetDescriptor source = new DatasetDescriptor(DatasetConstants.PLATFORM_MYSQL, database + "." + entity.getSourceEntityName()); source.addMetadata(DatasetConstants.CONNECTION_URL, connectionUrl); if (lineageInfo.isPresent()) { lineageInfo.get().setSource(source, workUnit); } } }
/** * Test lineage info is set on publishing multiple tasks */ @Test public void testPublishMultiTasks() throws IOException { WorkUnitState state1 = buildTaskState(2); WorkUnitState state2 = buildTaskState(2); LineageInfo lineageInfo = LineageInfo.getLineageInfo(state1.getTaskBroker()).get(); DatasetDescriptor source = new DatasetDescriptor("kafka", "testTopic"); lineageInfo.setSource(source, state1); lineageInfo.setSource(source, state2); BaseDataPublisher publisher = new BaseDataPublisher(state1); publisher.publishData(ImmutableList.of(state1, state2)); Assert.assertTrue(state1.contains("gobblin.event.lineage.branch.0.destination")); Assert.assertTrue(state1.contains("gobblin.event.lineage.branch.1.destination")); Assert.assertTrue(state2.contains("gobblin.event.lineage.branch.0.destination")); Assert.assertTrue(state2.contains("gobblin.event.lineage.branch.1.destination")); }
/** * Test lineage info is set on publishing single task */ @Test public void testPublishSingleTask() throws IOException { WorkUnitState state = buildTaskState(1); LineageInfo lineageInfo = LineageInfo.getLineageInfo(state.getTaskBroker()).get(); DatasetDescriptor source = new DatasetDescriptor("kafka", "testTopic"); lineageInfo.setSource(source, state); BaseDataPublisher publisher = new BaseDataPublisher(state); publisher.publishData(state); Assert.assertTrue(state.contains("gobblin.event.lineage.branch.0.destination")); Assert.assertFalse(state.contains("gobblin.event.lineage.branch.1.destination")); }
LineageInfo lineageInfo = LineageInfo.getLineageInfo(state.getTaskBroker()).get(); DatasetDescriptor source = new DatasetDescriptor("kafka", "testTopic"); lineageInfo.setSource(source, state); BaseDataPublisher publisher = new BaseDataPublisher(state);
@Test public void testEventForPartitionedDataset() { final String topic = "testTopic"; final String kafka = "kafka"; final String hdfs = "hdfs"; final String path = "/data/tracking/PageViewEvent"; final String partitionName = "hourly/2018/08/15/15"; State state = new State(); LineageInfo lineageInfo = getLineageInfo(); DatasetDescriptor source = new DatasetDescriptor(kafka, topic); lineageInfo.setSource(source, state); DatasetDescriptor destinationDataset = new DatasetDescriptor(hdfs, path); PartitionDescriptor destination = new PartitionDescriptor(partitionName, destinationDataset); lineageInfo.putDestination(destination, 0, state); Map<String, Set<LineageEventBuilder>> events = LineageInfo.load(state); LineageEventBuilder event = first(events.get("0")); verify(event, topic, source, destination); // Verify gobblin tracking event GobblinTrackingEvent trackingEvent = event.build(); Assert.assertEquals(LineageEventBuilder.isLineageEvent(trackingEvent), true); Assert.assertEquals(LineageEventBuilder.fromEvent(trackingEvent), event); }
LineageInfo lineageInfo = getLineageInfo(); DatasetDescriptor source = new DatasetDescriptor(kafka, topic); lineageInfo.setSource(source, state0); DatasetDescriptor destination00 = new DatasetDescriptor(hdfs, "/data/tracking"); destination00.addMetadata(branch, "0"); lineageInfo.setSource(source, state1); List<State> states = Lists.newArrayList(); states.add(state0);
source.addMetadata(DatasetConstants.BROKERS, kafkaBrokers); if (this.lineageInfo.isPresent()) { this.lineageInfo.get().setSource(source, workUnit);
@VisibleForTesting public void setSourceLineageInfo(WorkUnit workUnit, Optional<LineageInfo> lineageInfo) { HiveWorkUnit hiveWorkUnit = new HiveWorkUnit(workUnit); ConvertibleHiveDataset convertibleHiveDataset = (ConvertibleHiveDataset) hiveWorkUnit.getHiveDataset(); DatasetDescriptor sourceDataset = convertibleHiveDataset.getSourceDataset(); if (lineageInfo.isPresent()) { lineageInfo.get().setSource(sourceDataset, workUnit); } } }
private void addLineageInfo(CopyEntity copyEntity, WorkUnit workUnit) { if (copyEntity instanceof CopyableFile) { CopyableFile copyableFile = (CopyableFile) copyEntity; /* * In Gobblin Distcp, the source and target path info of a CopyableFile are determined by its dataset found by * a DatasetFinder. Consequently, the source and destination dataset for the CopyableFile lineage are expected * to be set by the same logic */ if (lineageInfo.isPresent() && copyableFile.getSourceData() != null && copyableFile.getDestinationData() != null) { lineageInfo.get().setSource(copyableFile.getSourceData(), workUnit); } } } }
protected void addLineageSourceInfo(SourceState sourceState, SourceEntity entity, WorkUnit workUnit) { String host = sourceState.getProp(ConfigurationKeys.SOURCE_CONN_HOST_NAME); String port = sourceState.getProp(ConfigurationKeys.SOURCE_CONN_PORT); String database = sourceState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_SCHEMA); String connectionUrl = "jdbc:mysql://" + host.trim() + ":" + port + "/" + database.trim(); DatasetDescriptor source = new DatasetDescriptor(DatasetConstants.PLATFORM_MYSQL, database + "." + entity.getSourceEntityName()); source.addMetadata(DatasetConstants.CONNECTION_URL, connectionUrl); if (lineageInfo.isPresent()) { lineageInfo.get().setSource(source, workUnit); } } }
source.addMetadata(DatasetConstants.BROKERS, kafkaBrokers); if (this.lineageInfo.isPresent()) { this.lineageInfo.get().setSource(source, workUnit);