/** * Deserialize a {@link DatasetDescriptor} from a string map * * @deprecated use {@link Descriptor#deserialize(String)} */ @Deprecated public static DatasetDescriptor fromDataMap(Map<String, String> dataMap) { DatasetDescriptor descriptor = new DatasetDescriptor(dataMap.get(PLATFORM_KEY), dataMap.get(NAME_KEY)); dataMap.forEach((key, value) -> { if (!key.equals(PLATFORM_KEY) && !key.equals(NAME_KEY)) { descriptor.addMetadata(key, value); } }); return descriptor; } }
@Test public void testDatasetDescriptor() { DatasetDescriptor dataset = new DatasetDescriptor("hdfs", "/data/tracking/PageViewEvent"); dataset.addMetadata("fsUri", "hdfs://test.com:2018"); DatasetDescriptor copy = dataset.copy(); Assert.assertEquals(copy.getName(), dataset.getName()); Assert.assertEquals(copy.getPlatform(), dataset.getPlatform()); Assert.assertEquals(copy.getMetadata(), dataset.getMetadata()); Assert.assertEquals(dataset, copy); Assert.assertEquals(dataset.hashCode(), copy.hashCode()); }
/** * @deprecated use {@link #copy()} */ @Deprecated public DatasetDescriptor(DatasetDescriptor copy) { super(copy.getName()); platform = copy.getPlatform(); metadata.putAll(copy.getMetadata()); }
@Override public DatasetDescriptor resolve(DatasetDescriptor raw, State state) { ImmutableMap<String, String> metadata = raw.getMetadata(); Preconditions.checkArgument(metadata.containsKey(DatasetConstants.FS_SCHEME), String.format("Hive Dataset Descriptor must contain metadata %s to create Hdfs Dataset Descriptor", DatasetConstants.FS_SCHEME)); Preconditions.checkArgument(metadata.containsKey(DatasetConstants.FS_SCHEME), String.format("Hive Dataset Descriptor must contain metadata %s to create Hdfs Dataset Descriptor", DatasetConstants.FS_LOCATION)); DatasetDescriptor datasetDescriptor = new DatasetDescriptor(metadata.get(DatasetConstants.FS_SCHEME), metadata.get(DatasetConstants.FS_LOCATION)); datasetDescriptor.addMetadata(HIVE_TABLE, raw.getName()); return datasetDescriptor; } }
@Override public DatasetDescriptor copy() { return new DatasetDescriptor(this); }
private List<DatasetDescriptor> createDestDatasets() { List<DatasetDescriptor> destDatasets = new ArrayList<>(); for (String format : getDestFormats()) { Optional<ConversionConfig> conversionConfigForFormat = getConversionConfigForFormat(format); if (!conversionConfigForFormat.isPresent()) { continue; } String destTable = conversionConfigForFormat.get().getDestinationDbName() + "." + conversionConfigForFormat.get() .getDestinationTableName(); DatasetDescriptor dest = new DatasetDescriptor(DatasetConstants.PLATFORM_HIVE, destTable); String destLocation = conversionConfigForFormat.get().getDestinationDataPath() + Path.SEPARATOR + "final"; dest.addMetadata(DatasetConstants.FS_SCHEME, getSourceDataset().getMetadata().get(DatasetConstants.FS_SCHEME)); dest.addMetadata(DatasetConstants.FS_LOCATION, destLocation); destDatasets.add(dest); } return destDatasets; }
@Override public DatasetDescriptor resolve(DatasetDescriptor raw, State state) { DatasetDescriptor descriptor = new DatasetDescriptor(raw.getPlatform(), DATASET_NAME); raw.getMetadata().forEach(descriptor::addMetadata); return descriptor; } }
private LineageEventBuilder getLineageEvent(Collection<LineageEventBuilder> events, int branchId, String destinationPlatform) { for (LineageEventBuilder event : events) { DatasetDescriptor descriptor = (DatasetDescriptor) event.getDestination(); if (descriptor.getPlatform().equals(destinationPlatform) && descriptor.getMetadata().get(DatasetConstants.BRANCH).equals(String.valueOf(branchId))) { return event; } } return null; }
@Override public int hashCode() { int result = dataset.hashCode(); result = 31 * result + getName().hashCode(); return result; }
@Override public DatasetDescriptor resolve(DatasetDescriptor raw, State state) { ImmutableMap<String, String> metadata = raw.getMetadata(); Preconditions.checkArgument(metadata.containsKey(DatasetConstants.FS_SCHEME), String.format("Hive Dataset Descriptor must contain metadata %s to create Hdfs Dataset Descriptor", DatasetConstants.FS_SCHEME)); Preconditions.checkArgument(metadata.containsKey(DatasetConstants.FS_SCHEME), String.format("Hive Dataset Descriptor must contain metadata %s to create Hdfs Dataset Descriptor", DatasetConstants.FS_LOCATION)); DatasetDescriptor datasetDescriptor = new DatasetDescriptor(metadata.get(DatasetConstants.FS_SCHEME), metadata.get(DatasetConstants.FS_LOCATION)); datasetDescriptor.addMetadata(HIVE_TABLE, raw.getName()); return datasetDescriptor; } }
@Override protected void addLineageSourceInfo(SourceState sourceState, SourceEntity entity, WorkUnit workUnit) { DatasetDescriptor source = new DatasetDescriptor(DatasetConstants.PLATFORM_SALESFORCE, entity.getSourceEntityName()); if (lineageInfo.isPresent()) { lineageInfo.get().setSource(source, workUnit); } }
private List<DatasetDescriptor> createDestDatasets() { List<DatasetDescriptor> destDatasets = new ArrayList<>(); for (String format : getDestFormats()) { Optional<ConversionConfig> conversionConfigForFormat = getConversionConfigForFormat(format); if (!conversionConfigForFormat.isPresent()) { continue; } String destTable = conversionConfigForFormat.get().getDestinationDbName() + "." + conversionConfigForFormat.get() .getDestinationTableName(); DatasetDescriptor dest = new DatasetDescriptor(DatasetConstants.PLATFORM_HIVE, destTable); String destLocation = conversionConfigForFormat.get().getDestinationDataPath() + Path.SEPARATOR + "final"; dest.addMetadata(DatasetConstants.FS_SCHEME, getSourceDataset().getMetadata().get(DatasetConstants.FS_SCHEME)); dest.addMetadata(DatasetConstants.FS_LOCATION, destLocation); destDatasets.add(dest); } return destDatasets; }
copyableFile.setFsDatasets(originFs, targetFs); DatasetDescriptor source = (DatasetDescriptor) copyableFile.getSourceData(); Assert.assertEquals(source.getName(), "/data/databases/source"); Assert.assertEquals(source.getPlatform(), "hdfs"); Assert.assertEquals(source.getMetadata().get("fsUri"), originFsUri); DatasetDescriptor destination = (DatasetDescriptor) copyableFile.getDestinationData(); Assert.assertEquals(destination.getName(), "/data/databases/destination"); Assert.assertEquals(destination.getPlatform(), "file"); Assert.assertEquals(destination.getMetadata().get("fsUri"), targetFsUri); copyableFile.setFsDatasets(originFs, targetFs); source = (DatasetDescriptor) copyableFile.getSourceData(); Assert.assertEquals(source.getName(), "/data/databases/source/profile"); Assert.assertEquals(source.getPlatform(), "hdfs"); Assert.assertEquals(source.getMetadata().get("fsUri"), originFsUri); destination = (DatasetDescriptor) copyableFile.getDestinationData(); Assert.assertEquals(destination.getName(), "/data/databases/destination/profile"); Assert.assertEquals(destination.getPlatform(), "file"); Assert.assertEquals(destination.getMetadata().get("fsUri"), targetFsUri);
@Override public int hashCode() { int result = platform.hashCode(); result = 31 * result + getName().hashCode(); result = 31 * result + metadata.hashCode(); return result; }
@Override public int hashCode() { int result = dataset.hashCode(); result = 31 * result + getName().hashCode(); return result; }
/** * Create destination dataset descriptor */ protected DatasetDescriptor createDestinationDescriptor(WorkUnitState state, int branchId) { Path publisherOutputDir = getPublisherOutputDir(state, branchId); FileSystem fs = this.publisherFileSystemByBranches.get(branchId); DatasetDescriptor destination = new DatasetDescriptor(fs.getScheme(), publisherOutputDir.toString()); destination.addMetadata(DatasetConstants.FS_URI, fs.getUri().toString()); destination.addMetadata(DatasetConstants.BRANCH, String.valueOf(branchId)); return destination; }
@Override public Descriptor getDataDescriptor() { // Dataset is resulted from WriterUtils.getWriterOutputDir(properties, this.numBranches, this.branchId) // The writer dataset might not be same as the published dataset DatasetDescriptor datasetDescriptor = new DatasetDescriptor(fs.getScheme(), outputFile.getParent().toString()); if (partitionKey == null) { return datasetDescriptor; } return new PartitionDescriptor(partitionKey, datasetDescriptor); }
DatasetDescriptor sourceDD = GSON.fromJson(props.getProperty("gobblin.event.lineage.source"), DatasetDescriptor.class); Assert.assertEquals(sourceDD.getPlatform(), "file"); Assert.assertEquals(sourceDD.getName(), "/tmp/test"); Assert.assertEquals(sourceDD.getMetadata().get(HiveToHdfsDatasetResolver.HIVE_TABLE), "db1.tb1"); DatasetDescriptor destDD1 = (DatasetDescriptor) firstDescriptor(props, "gobblin.event.lineage.branch.1.destination"); Assert.assertEquals(destDD1.getPlatform(), "file"); Assert.assertEquals(destDD1.getName(), "/tmp/data_nestedOrc/db1/tb1/final"); Assert.assertEquals(destDD1.getMetadata().get(HiveToHdfsDatasetResolver.HIVE_TABLE), "db1_nestedOrcDb.tb1_nestedOrc"); DatasetDescriptor destDD2 = (DatasetDescriptor) firstDescriptor(props, "gobblin.event.lineage.branch.2.destination"); Assert.assertEquals(destDD2.getPlatform(), "file"); Assert.assertEquals(destDD2.getName(), "/tmp/data_flattenedOrc/db1/tb1/final"); Assert.assertEquals(destDD2.getMetadata().get(HiveToHdfsDatasetResolver.HIVE_TABLE), "db1_flattenedOrcDb.tb1_flattenedOrc");
/** * Serialize to a string map * * @deprecated use {@link Descriptor#serialize(Descriptor)} */ @Deprecated public Map<String, String> toDataMap() { Map<String, String> map = Maps.newHashMap(); map.put(PLATFORM_KEY, platform); map.put(NAME_KEY, getName()); map.putAll(metadata); return map; }