@Override public List<WorkUnit> getWorkunits(SourceState state) { initLogger(state); lineageInfo = LineageInfo.getLineageInfo(state.getBroker());
lineageInfo = LineageInfo.getLineageInfo(((SourceState) state).getBroker()); } else if (state instanceof WorkUnitState) { lineageInfo = LineageInfo.getLineageInfo(((WorkUnitState) state).getTaskBrokerNullable()); } else { lineageInfo = Optional.absent();
/** * Build a new {@link CopyDataPublisher} from {@link State}. The constructor expects the following to be set in the * {@link State}, * <ul> * <li>{@link ConfigurationKeys#WRITER_OUTPUT_DIR} * <li>{@link ConfigurationKeys#WRITER_FILE_SYSTEM_URI} * </ul> * */ public CopyDataPublisher(State state) throws IOException { super(state); // Extract LineageInfo from state if (state instanceof SourceState) { lineageInfo = LineageInfo.getLineageInfo(((SourceState) state).getBroker()); } else if (state instanceof WorkUnitState) { lineageInfo = LineageInfo.getLineageInfo(((WorkUnitState) state).getTaskBrokerNullable()); } else { lineageInfo = Optional.absent(); } String uri = this.state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI); this.fs = FileSystem.get(URI.create(uri), WriterUtils.getFsConfiguration(state)); FileAwareInputStreamDataWriterBuilder.setJobSpecificOutputPaths(state); this.writerOutputDir = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)); MetricContext metricContext = Instrumented.getMetricContext(state, CopyDataPublisher.class, GobblinMetrics.getCustomTagsFromState(state)); this.eventSubmitter = new EventSubmitter.Builder(metricContext, "org.apache.gobblin.copy.CopyDataPublisher").build(); this.recoveryHelper = new RecoveryHelper(this.fs, state); this.recoveryHelper.purgeOldPersistedFile(); }
private LineageInfo getLineageInfo() { SharedResourcesBroker<GobblinScopeTypes> instanceBroker = SharedResourcesBrokerFactory .createDefaultTopLevelBroker(ConfigFactory.empty(), GobblinScopeTypes.GLOBAL.defaultScopeInstance()); SharedResourcesBroker<GobblinScopeTypes> jobBroker = instanceBroker .newSubscopedBuilder(new JobScopeInstance("LineageEventTest", String.valueOf(System.currentTimeMillis()))) .build(); SharedResourcesBroker<GobblinScopeTypes> taskBroker = jobBroker .newSubscopedBuilder(new TaskScopeInstance("LineageEventTestTask" + String.valueOf(System.currentTimeMillis()))) .build(); LineageInfo obj1 = LineageInfo.getLineageInfo(jobBroker).get(); LineageInfo obj2 = LineageInfo.getLineageInfo(taskBroker).get(); Assert.assertTrue(obj1 == obj2); return obj2; }
lineageInfo = LineageInfo.getLineageInfo(((SourceState) state).getBroker()); } else if (state instanceof WorkUnitState) { lineageInfo = LineageInfo.getLineageInfo(((WorkUnitState) state).getTaskBrokerNullable()); } else { lineageInfo = Optional.absent();
@Override public List<WorkUnit> getWorkunits(SourceState state) { if (!state.contains(HIVE_SOURCE_DATASET_FINDER_CLASS_KEY)) { state.setProp(HIVE_SOURCE_DATASET_FINDER_CLASS_KEY, ConvertibleHiveDatasetFinder.class.getName()); } if (!state.contains(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY)) { state.setProp(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY, "hive.conversion.avro"); } this.lineageInfo = LineageInfo.getLineageInfo(state.getBroker()); List<WorkUnit> workunits = super.getWorkunits(state); for (WorkUnit workUnit : workunits) { if (LineageUtils.shouldSetLineageInfo(workUnit)) { setSourceLineageInfo(workUnit, this.lineageInfo); } } return workunits; }
@Override public List<WorkUnit> getWorkunits(SourceState state) { this.metricContext = Instrumented.getMetricContext(state, KafkaSource.class); this.lineageInfo = LineageInfo.getLineageInfo(state.getBroker());
this.lineageInfo = LineageInfo.getLineageInfo(state.getBroker());
/** * Test lineage info is set on publishing single task */ @Test public void testPublishSingleTask() throws IOException { WorkUnitState state = buildTaskState(1); LineageInfo lineageInfo = LineageInfo.getLineageInfo(state.getTaskBroker()).get(); DatasetDescriptor source = new DatasetDescriptor("kafka", "testTopic"); lineageInfo.setSource(source, state); BaseDataPublisher publisher = new BaseDataPublisher(state); publisher.publishData(state); Assert.assertTrue(state.contains("gobblin.event.lineage.branch.0.destination")); Assert.assertFalse(state.contains("gobblin.event.lineage.branch.1.destination")); }
/** * Test lineage info is set on publishing multiple tasks */ @Test public void testPublishMultiTasks() throws IOException { WorkUnitState state1 = buildTaskState(2); WorkUnitState state2 = buildTaskState(2); LineageInfo lineageInfo = LineageInfo.getLineageInfo(state1.getTaskBroker()).get(); DatasetDescriptor source = new DatasetDescriptor("kafka", "testTopic"); lineageInfo.setSource(source, state1); lineageInfo.setSource(source, state2); BaseDataPublisher publisher = new BaseDataPublisher(state1); publisher.publishData(ImmutableList.of(state1, state2)); Assert.assertTrue(state1.contains("gobblin.event.lineage.branch.0.destination")); Assert.assertTrue(state1.contains("gobblin.event.lineage.branch.1.destination")); Assert.assertTrue(state2.contains("gobblin.event.lineage.branch.0.destination")); Assert.assertTrue(state2.contains("gobblin.event.lineage.branch.1.destination")); }
LineageInfo lineageInfo = LineageInfo.getLineageInfo(state.getTaskBroker()).get(); DatasetDescriptor source = new DatasetDescriptor("kafka", "testTopic"); lineageInfo.setSource(source, state);
workUnit.setProp(ConfigurationKeys.JOB_ID_KEY, "123456"); Optional<LineageInfo> lineageInfo = LineageInfo.getLineageInfo(getSharedJobBroker(workUnit.getProperties())); HiveAvroToOrcSource src = new HiveAvroToOrcSource(); Assert.assertTrue(LineageUtils.shouldSetLineageInfo(workUnit));
@Override public List<WorkUnit> getWorkunits(SourceState state) { initLogger(state); lineageInfo = LineageInfo.getLineageInfo(state.getBroker());
lineageInfo = LineageInfo.getLineageInfo(((SourceState) state).getBroker()); } else if (state instanceof WorkUnitState) { lineageInfo = LineageInfo.getLineageInfo(((WorkUnitState) state).getTaskBrokerNullable()); } else { lineageInfo = Optional.absent();
/** * Build a new {@link CopyDataPublisher} from {@link State}. The constructor expects the following to be set in the * {@link State}, * <ul> * <li>{@link ConfigurationKeys#WRITER_OUTPUT_DIR} * <li>{@link ConfigurationKeys#WRITER_FILE_SYSTEM_URI} * </ul> * */ public CopyDataPublisher(State state) throws IOException { super(state); // Extract LineageInfo from state if (state instanceof SourceState) { lineageInfo = LineageInfo.getLineageInfo(((SourceState) state).getBroker()); } else if (state instanceof WorkUnitState) { lineageInfo = LineageInfo.getLineageInfo(((WorkUnitState) state).getTaskBrokerNullable()); } else { lineageInfo = Optional.absent(); } String uri = this.state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI); this.fs = FileSystem.get(URI.create(uri), WriterUtils.getFsConfiguration(state)); FileAwareInputStreamDataWriterBuilder.setJobSpecificOutputPaths(state); this.writerOutputDir = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)); MetricContext metricContext = Instrumented.getMetricContext(state, CopyDataPublisher.class, GobblinMetrics.getCustomTagsFromState(state)); this.eventSubmitter = new EventSubmitter.Builder(metricContext, "org.apache.gobblin.copy.CopyDataPublisher").build(); this.recoveryHelper = new RecoveryHelper(this.fs, state); this.recoveryHelper.purgeOldPersistedFile(); }
lineageInfo = LineageInfo.getLineageInfo(((SourceState) state).getBroker()); } else if (state instanceof WorkUnitState) { lineageInfo = LineageInfo.getLineageInfo(((WorkUnitState) state).getTaskBrokerNullable()); } else { lineageInfo = Optional.absent();
@Override public List<WorkUnit> getWorkunits(SourceState state) { if (!state.contains(HIVE_SOURCE_DATASET_FINDER_CLASS_KEY)) { state.setProp(HIVE_SOURCE_DATASET_FINDER_CLASS_KEY, ConvertibleHiveDatasetFinder.class.getName()); } if (!state.contains(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY)) { state.setProp(HiveDatasetFinder.HIVE_DATASET_CONFIG_PREFIX_KEY, "hive.conversion.avro"); } this.lineageInfo = LineageInfo.getLineageInfo(state.getBroker()); List<WorkUnit> workunits = super.getWorkunits(state); for (WorkUnit workUnit : workunits) { if (LineageUtils.shouldSetLineageInfo(workUnit)) { setSourceLineageInfo(workUnit, this.lineageInfo); } } return workunits; }
@Override public List<WorkUnit> getWorkunits(SourceState state) { this.metricContext = Instrumented.getMetricContext(state, KafkaSource.class); this.lineageInfo = LineageInfo.getLineageInfo(state.getBroker());
this.lineageInfo = LineageInfo.getLineageInfo(state.getBroker());