/** * Constructor. */ public LineageState() { dirToFop = new HashMap<>(); linfo = new LineageInfo(); index = new Index(); }
/** * Set the lineage information for the associated directory. * * @param dir The directory containing the query results. * @param dc The associated data container. * @param cols The list of columns. */ public synchronized void setLineage(Path dir, DataContainer dc, List<FieldSchema> cols) { // First lookup the file sink operator from the load work. Operator<?> op = dirToFop.get(dir.toUri().toString()); // Go over the associated fields and look up the dependencies // by position in the row schema of the filesink operator. if (op == null) { return; } List<ColumnInfo> signature = op.getSchema().getSignature(); int i = 0; for (FieldSchema fs : cols) { linfo.putDependency(dc, fs, index.getDependency(op, signature.get(i++))); } }
/** * Clear all lineage states */ public synchronized void clear() { dirToFop.clear(); linfo.clear(); index.clear(); } }
new LinkedList<Map.Entry<DependencyKey, Dependency>>(linfo.entrySet()); Collections.sort(entry_list, new DependencyKeyComp()); Iterator<Map.Entry<DependencyKey, Dependency>> iter = entry_list.iterator();
new LinkedList<Map.Entry<DependencyKey, Dependency>>(linfo.entrySet()); Collections.sort(entry_list, new DependencyKeyComp()); Iterator<Map.Entry<DependencyKey, Dependency>> iter = entry_list.iterator();
public static Map<String, List<HiveColumnLineageInfo>> buildLineageMap(LineageInfo lInfo) { Map<String, List<HiveColumnLineageInfo>> m = new HashMap<>(); for (Map.Entry<LineageInfo.DependencyKey, LineageInfo.Dependency> e : lInfo.entrySet()) { List<HiveColumnLineageInfo> l = new ArrayList<>(); String k = getQualifiedName(e.getKey()); if (LOG.isDebugEnabled()) { LOG.debug("buildLineageMap(): key={}; value={}", e.getKey(), e.getValue()); } Collection<LineageInfo.BaseColumnInfo> baseCols = getBaseCols(e.getValue()); if (baseCols != null) { for (LineageInfo.BaseColumnInfo iCol : baseCols) { String db = iCol.getTabAlias().getTable().getDbName(); String table = iCol.getTabAlias().getTable().getTableName(); String colQualifiedName = iCol.getColumn() == null ? db + "." + table : db + "." + table + "." + iCol.getColumn().getName(); l.add(new HiveColumnLineageInfo(e.getValue(), colQualifiedName)); } if (LOG.isDebugEnabled()) { LOG.debug("Setting lineage --> Input: {} ==> Output : {}", l, k); } m.put(k, l); } } return m; }
/** * Constructor. */ public LineageState() { dirToFop = new HashMap<Path, Operator>(); linfo = new LineageInfo(); index = new Index(); }
/** * Set the lineage information for the associated directory. * * @param dir The directory containing the query results. * @param dc The associated data container. * @param cols The list of columns. */ public void setLineage(Path dir, DataContainer dc, List<FieldSchema> cols) { // First lookup the file sink operator from the load work. Operator<?> op = dirToFop.get(dir); // Go over the associated fields and look up the dependencies // by position in the row schema of the filesink operator. if (op == null) { return; } List<ColumnInfo> signature = op.getSchema().getSignature(); int i = 0; for (FieldSchema fs : cols) { linfo.putDependency(dc, fs, index.getDependency(op, signature.get(i++))); } }
/** * Clear all lineage states */ public void clear() { dirToFop.clear(); linfo.clear(); index.clear(); } }
private void processColumnLineage(AtlasEntity hiveProcess, AtlasEntitiesWithExtInfo entities) { LineageInfo lineageInfo = getHiveContext().getLinfo(); if (lineageInfo == null || CollectionUtils.isEmpty(lineageInfo.entrySet())) { return; final Set<String> processedOutputCols = new HashSet<>(); for (Map.Entry<DependencyKey, Dependency> entry : lineageInfo.entrySet()) { String outputColName = getQualifiedName(entry.getKey()); AtlasEntity outputColumn = context.getEntity(outputColName);
/** * Constructor. */ public LineageState() { dirToFop = new HashMap<String, FileSinkOperator>(); linfo = new LineageInfo(); }
/** * Set the lineage information for the associated directory. * * @param dir The directory containing the query results. * @param dc The associated data container. * @param cols The list of columns. */ public void setLineage(Path dir, DataContainer dc, List<FieldSchema> cols) { // First lookup the file sink operator from the load work. FileSinkOperator fop = dirToFop.get(dir); // Go over the associated fields and look up the dependencies // by position in the row schema of the filesink operator. if (fop == null) { return; } List<ColumnInfo> signature = fop.getSchema().getSignature(); int i = 0; for (FieldSchema fs : cols) { linfo.putDependency(dc, fs, index.getDependency(fop, signature.get(i++))); } }
/** * Clear all lineage states */ public void clear() { dirToFop.clear(); linfo.clear(); index.clear(); } }
new LinkedList<Map.Entry<DependencyKey, Dependency>>(linfo.entrySet()); Collections.sort(entry_list, new DependencyKeyComp()); Iterator<Map.Entry<DependencyKey, Dependency>> iter = entry_list.iterator();
/** * Constructor. */ public LineageState() { dirToFop = new HashMap<Path, FileSinkOperator>(); linfo = new LineageInfo(); }
/** * Set the lineage information for the associated directory. * * @param dir The directory containing the query results. * @param dc The associated data container. * @param cols The list of columns. */ public void setLineage(String dir, DataContainer dc, List<FieldSchema> cols) { // First lookup the file sink operator from the load work. FileSinkOperator fop = dirToFop.get(dir); // Go over the associated fields and look up the dependencies // by position in the row schema of the filesink operator. if (fop == null) { return; } List<ColumnInfo> signature = fop.getSchema().getSignature(); int i = 0; for (FieldSchema fs : cols) { linfo.putDependency(dc, fs, index.getDependency(fop, signature.get(i++))); } }