/** * Create destination dataset descriptor */ protected DatasetDescriptor createDestinationDescriptor(WorkUnitState state, int branchId) { Path publisherOutputDir = getPublisherOutputDir(state, branchId); FileSystem fs = this.publisherFileSystemByBranches.get(branchId); DatasetDescriptor destination = new DatasetDescriptor(fs.getScheme(), publisherOutputDir.toString()); destination.addMetadata(DatasetConstants.FS_URI, fs.getUri().toString()); destination.addMetadata(DatasetConstants.BRANCH, String.valueOf(branchId)); return destination; }
/** * Deserialize a {@link DatasetDescriptor} from a string map * * @deprecated use {@link Descriptor#deserialize(String)} */ @Deprecated public static DatasetDescriptor fromDataMap(Map<String, String> dataMap) { DatasetDescriptor descriptor = new DatasetDescriptor(dataMap.get(PLATFORM_KEY), dataMap.get(NAME_KEY)); dataMap.forEach((key, value) -> { if (!key.equals(PLATFORM_KEY) && !key.equals(NAME_KEY)) { descriptor.addMetadata(key, value); } }); return descriptor; } }
@Override public DatasetDescriptor resolve(DatasetDescriptor raw, State state) { ImmutableMap<String, String> metadata = raw.getMetadata(); Preconditions.checkArgument(metadata.containsKey(DatasetConstants.FS_SCHEME), String.format("Hive Dataset Descriptor must contain metadata %s to create Hdfs Dataset Descriptor", DatasetConstants.FS_SCHEME)); Preconditions.checkArgument(metadata.containsKey(DatasetConstants.FS_SCHEME), String.format("Hive Dataset Descriptor must contain metadata %s to create Hdfs Dataset Descriptor", DatasetConstants.FS_LOCATION)); DatasetDescriptor datasetDescriptor = new DatasetDescriptor(metadata.get(DatasetConstants.FS_SCHEME), metadata.get(DatasetConstants.FS_LOCATION)); datasetDescriptor.addMetadata(HIVE_TABLE, raw.getName()); return datasetDescriptor; } }
/** * Set file system based source and destination dataset for this {@link CopyableFile} * * @param originFs {@link FileSystem} where this {@link CopyableFile} origins * @param targetFs {@link FileSystem} where this {@link CopyableFile} is copied to */ public void setFsDatasets(FileSystem originFs, FileSystem targetFs) { /* * By default, the raw Gobblin dataset for CopyableFile lineage is its parent folder * if itself is not a folder */ boolean isDir = origin.isDirectory(); Path fullSourcePath = Path.getPathWithoutSchemeAndAuthority(origin.getPath()); String sourceDatasetName = isDir ? fullSourcePath.toString() : fullSourcePath.getParent().toString(); DatasetDescriptor sourceDataset = new DatasetDescriptor(originFs.getScheme(), sourceDatasetName); sourceDataset.addMetadata(DatasetConstants.FS_URI, originFs.getUri().toString()); sourceData = sourceDataset; Path fullDestinationPath = Path.getPathWithoutSchemeAndAuthority(destination); String destinationDatasetName = isDir ? fullDestinationPath.toString() : fullDestinationPath.getParent().toString(); DatasetDescriptor destinationDataset = new DatasetDescriptor(targetFs.getScheme(), destinationDatasetName); destinationDataset.addMetadata(DatasetConstants.FS_URI, targetFs.getUri().toString()); destinationData = destinationDataset; }
DatasetDescriptor getDestinationDataset() { String destinationTable = this.getTargetDatabase() + "." + this.getTargetTable(); DatasetDescriptor destinationDataset = new DatasetDescriptor(DatasetConstants.PLATFORM_HIVE, destinationTable); destinationDataset.addMetadata(DatasetConstants.FS_URI, this.getTargetFs().getUri().toString()); return destinationDataset; } }
private DatasetDescriptor createSourceDataset() { try { String sourceTable = getTable().getDbName() + "." + getTable().getTableName(); DatasetDescriptor source = new DatasetDescriptor(DatasetConstants.PLATFORM_HIVE, sourceTable); Path sourcePath = getTable().getDataLocation(); log.info(String.format("[%s]Source path %s being used in conversion", this.getClass().getName(), sourcePath)); String sourceLocation = Path.getPathWithoutSchemeAndAuthority(sourcePath).toString(); FileSystem sourceFs = sourcePath.getFileSystem(new Configuration()); source.addMetadata(DatasetConstants.FS_SCHEME, sourceFs.getScheme()); source.addMetadata(DatasetConstants.FS_LOCATION, sourceLocation); return source; } catch (IOException e) { throw new RuntimeException(e); } }
private List<DatasetDescriptor> createDestDatasets() { List<DatasetDescriptor> destDatasets = new ArrayList<>(); for (String format : getDestFormats()) { Optional<ConversionConfig> conversionConfigForFormat = getConversionConfigForFormat(format); if (!conversionConfigForFormat.isPresent()) { continue; } String destTable = conversionConfigForFormat.get().getDestinationDbName() + "." + conversionConfigForFormat.get() .getDestinationTableName(); DatasetDescriptor dest = new DatasetDescriptor(DatasetConstants.PLATFORM_HIVE, destTable); String destLocation = conversionConfigForFormat.get().getDestinationDataPath() + Path.SEPARATOR + "final"; dest.addMetadata(DatasetConstants.FS_SCHEME, getSourceDataset().getMetadata().get(DatasetConstants.FS_SCHEME)); dest.addMetadata(DatasetConstants.FS_LOCATION, destLocation); destDatasets.add(dest); } return destDatasets; }
protected void addLineageSourceInfo(SourceState sourceState, SourceEntity entity, WorkUnit workUnit) { String host = sourceState.getProp(ConfigurationKeys.SOURCE_CONN_HOST_NAME); String port = sourceState.getProp(ConfigurationKeys.SOURCE_CONN_PORT); String database = sourceState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_SCHEMA); String connectionUrl = "jdbc:mysql://" + host.trim() + ":" + port + "/" + database.trim(); DatasetDescriptor source = new DatasetDescriptor(DatasetConstants.PLATFORM_MYSQL, database + "." + entity.getSourceEntityName()); source.addMetadata(DatasetConstants.CONNECTION_URL, connectionUrl); if (lineageInfo.isPresent()) { lineageInfo.get().setSource(source, workUnit); } } }
DatasetDescriptor getSourceDataset() { String sourceTable = dataset.getTable().getDbName() + "." + dataset.getTable().getTableName(); DatasetDescriptor sourceDataset = new DatasetDescriptor(DatasetConstants.PLATFORM_HIVE, sourceTable); sourceDataset.addMetadata(DatasetConstants.FS_URI, dataset.getFs().getUri().toString()); return sourceDataset; }
@Test public void testDatasetDescriptor() { DatasetDescriptor dataset = new DatasetDescriptor("hdfs", "/data/tracking/PageViewEvent"); dataset.addMetadata("fsUri", "hdfs://test.com:2018"); DatasetDescriptor copy = dataset.copy(); Assert.assertEquals(copy.getName(), dataset.getName()); Assert.assertEquals(copy.getPlatform(), dataset.getPlatform()); Assert.assertEquals(copy.getMetadata(), dataset.getMetadata()); Assert.assertEquals(dataset, copy); Assert.assertEquals(dataset.hashCode(), copy.hashCode()); }
String outputPath = String.format("/data/output/branch%d/namespace/table", i); DatasetDescriptor destinationDataset = new DatasetDescriptor("file", outputPath); destinationDataset.addMetadata("fsUri", "file:///"); destinationDataset.addMetadata("branch", "" + i);
lineageInfo.setSource(source, state0); DatasetDescriptor destination00 = new DatasetDescriptor(hdfs, "/data/tracking"); destination00.addMetadata(branch, "0"); lineageInfo.putDestination(destination00, 0, state0); DatasetDescriptor destination01 = new DatasetDescriptor(mysql, "kafka.testTopic"); destination01.addMetadata(branch, "1"); lineageInfo.putDestination(destination01, 1, state0); destination12.addMetadata(branch, "2"); lineageInfo.putDestination(destination12, 2, state1); eventsList = LineageInfo.load(states); lineageInfo.putDestination(destination10, 0, state1); DatasetDescriptor destination11 = new DatasetDescriptor("hive", "kafka.testTopic1"); destination11.addMetadata(branch, "1"); lineageInfo.putDestination(destination11, 1, state1); eventsList = LineageInfo.load(states);
source.addMetadata(DatasetConstants.BROKERS, kafkaBrokers); if (this.lineageInfo.isPresent()) { this.lineageInfo.get().setSource(source, workUnit);
/** * Create destination dataset descriptor */ protected DatasetDescriptor createDestinationDescriptor(WorkUnitState state, int branchId) { Path publisherOutputDir = getPublisherOutputDir(state, branchId); FileSystem fs = this.publisherFileSystemByBranches.get(branchId); DatasetDescriptor destination = new DatasetDescriptor(fs.getScheme(), publisherOutputDir.toString()); destination.addMetadata(DatasetConstants.FS_URI, fs.getUri().toString()); destination.addMetadata(DatasetConstants.BRANCH, String.valueOf(branchId)); return destination; }
/** * Deserialize a {@link DatasetDescriptor} from a string map * * @deprecated use {@link Descriptor#deserialize(String)} */ @Deprecated public static DatasetDescriptor fromDataMap(Map<String, String> dataMap) { DatasetDescriptor descriptor = new DatasetDescriptor(dataMap.get(PLATFORM_KEY), dataMap.get(NAME_KEY)); dataMap.forEach((key, value) -> { if (!key.equals(PLATFORM_KEY) && !key.equals(NAME_KEY)) { descriptor.addMetadata(key, value); } }); return descriptor; } }
@Override public DatasetDescriptor resolve(DatasetDescriptor raw, State state) { ImmutableMap<String, String> metadata = raw.getMetadata(); Preconditions.checkArgument(metadata.containsKey(DatasetConstants.FS_SCHEME), String.format("Hive Dataset Descriptor must contain metadata %s to create Hdfs Dataset Descriptor", DatasetConstants.FS_SCHEME)); Preconditions.checkArgument(metadata.containsKey(DatasetConstants.FS_SCHEME), String.format("Hive Dataset Descriptor must contain metadata %s to create Hdfs Dataset Descriptor", DatasetConstants.FS_LOCATION)); DatasetDescriptor datasetDescriptor = new DatasetDescriptor(metadata.get(DatasetConstants.FS_SCHEME), metadata.get(DatasetConstants.FS_LOCATION)); datasetDescriptor.addMetadata(HIVE_TABLE, raw.getName()); return datasetDescriptor; } }
DatasetDescriptor getDestinationDataset() { String destinationTable = this.getTargetDatabase() + "." + this.getTargetTable(); DatasetDescriptor destinationDataset = new DatasetDescriptor(DatasetConstants.PLATFORM_HIVE, destinationTable); destinationDataset.addMetadata(DatasetConstants.FS_URI, this.getTargetFs().getUri().toString()); return destinationDataset; } }
protected void addLineageSourceInfo(SourceState sourceState, SourceEntity entity, WorkUnit workUnit) { String host = sourceState.getProp(ConfigurationKeys.SOURCE_CONN_HOST_NAME); String port = sourceState.getProp(ConfigurationKeys.SOURCE_CONN_PORT); String database = sourceState.getProp(ConfigurationKeys.SOURCE_QUERYBASED_SCHEMA); String connectionUrl = "jdbc:mysql://" + host.trim() + ":" + port + "/" + database.trim(); DatasetDescriptor source = new DatasetDescriptor(DatasetConstants.PLATFORM_MYSQL, database + "." + entity.getSourceEntityName()); source.addMetadata(DatasetConstants.CONNECTION_URL, connectionUrl); if (lineageInfo.isPresent()) { lineageInfo.get().setSource(source, workUnit); } } }
private DatasetDescriptor createSourceDataset() { try { String sourceTable = getTable().getDbName() + "." + getTable().getTableName(); DatasetDescriptor source = new DatasetDescriptor(DatasetConstants.PLATFORM_HIVE, sourceTable); Path sourcePath = getTable().getDataLocation(); String sourceLocation = Path.getPathWithoutSchemeAndAuthority(sourcePath).toString(); FileSystem sourceFs = sourcePath.getFileSystem(new Configuration()); source.addMetadata(DatasetConstants.FS_SCHEME, sourceFs.getScheme()); source.addMetadata(DatasetConstants.FS_LOCATION, sourceLocation); return source; } catch (IOException e) { throw new RuntimeException(e); } }
DatasetDescriptor getSourceDataset() { String sourceTable = dataset.getTable().getDbName() + "." + dataset.getTable().getTableName(); DatasetDescriptor sourceDataset = new DatasetDescriptor(DatasetConstants.PLATFORM_HIVE, sourceTable); sourceDataset.addMetadata(DatasetConstants.FS_URI, dataset.getFs().getUri().toString()); return sourceDataset; }