public String toCompactString(boolean showTags) { StringBuilder sb = new StringBuilder(); Set<IndexedWord> used = wordMapFactory.newSet(); Collection<IndexedWord> roots = getRoots(); if (roots.isEmpty()) { if (size() == 0) { return "[EMPTY_SEMANTIC_GRAPH]"; } else { return "[UNROOTED_SEMANTIC_GRAPH]"; } // return toString("readable"); } for (IndexedWord root : roots) { toCompactStringHelper(root, sb, used, showTags); } return sb.toString(); }
/** * Returns a pretty-printed string representation of the given semantic graph, * on one or more lines. */ public String formatSemanticGraph(SemanticGraph sg) { if (sg.vertexSet().isEmpty()) { return "[]"; } out = new StringBuilder(); // not thread-safe!!! used = Generics.newHashSet(); if (sg.getRoots().size() == 1) { formatSGNode(sg, sg.getFirstRoot(), 1); } else { int index = 0; for (IndexedWord root: sg.getRoots()) { index+=1; out.append("root_").append(index).append("> "); formatSGNode(sg, root, 9); out.append("\n"); } } String result = out.toString(); if (!result.startsWith("[")) { result = "[" + result + "]"; } return result; }
private String toReadableString() { StringBuilder buf = new StringBuilder(); buf.append(String.format("%-20s%-20s%-20s%n", "dep", "reln", "gov")); buf.append(String.format("%-20s%-20s%-20s%n", "---", "----", "---")); for (IndexedWord root : getRoots()) { buf.append(String.format("%-20s%-20s%-20s%n", root.toString(CoreLabel.OutputFormat.VALUE_TAG_INDEX), "root", "root")); } for (SemanticGraphEdge edge : this.edgeListSorted()) { buf.append(String.format("%-20s%-20s%-20s%n", edge.getTarget().toString(CoreLabel.OutputFormat.VALUE_TAG_INDEX), edge.getRelation().toString(), edge.getSource().toString(CoreLabel.OutputFormat.VALUE_TAG_INDEX))); } return buf.toString(); }
/** * Deletes all nodes that are not rooted (such as dangling vertices after a series of * edges have been chopped). */ public static void killNonRooted(SemanticGraph sg) { List<IndexedWord> nodes = new ArrayList<>(sg.vertexSet()); // Hack: store all of the nodes we know are in the rootset Set<IndexedWord> guaranteed = Generics.newHashSet(); for (IndexedWord root : sg.getRoots()) { guaranteed.add(root); guaranteed.addAll(sg.descendants(root)); } for (IndexedWord node : nodes) { if (!guaranteed.contains(node)) { sg.removeVertex(node); } } }
/** * Returns a String representation of this graph as a list of typed * dependencies, as exemplified by the following: * * <pre> * nsubj(died-6, Sam-3) * tmod(died-6, today-9) * </pre> * * @return a {@code String} representation of this set of typed dependencies */ public String toList() { StringBuilder buf = new StringBuilder(); for (IndexedWord root : getRoots()) { buf.append("root(ROOT-0, "); buf.append(root.toString(CoreLabel.OutputFormat.VALUE_INDEX)).append(")\n"); } for (SemanticGraphEdge edge : this.edgeListSorted()) { buf.append(edge.getRelation()).append("("); buf.append(edge.getSource().toString(CoreLabel.OutputFormat.VALUE_INDEX)).append(", "); buf.append(edge.getTarget().toString(CoreLabel.OutputFormat.VALUE_INDEX)).append(")\n"); } return buf.toString(); }
public String toString(CoreLabel.OutputFormat wordFormat) { Collection<IndexedWord> rootNodes = getRoots(); if (rootNodes.isEmpty()) { // Shouldn't happen, but return something! return toString(OutputFormat.READABLE); } StringBuilder sb = new StringBuilder(); Set<IndexedWord> used = wordMapFactory.newSet(); for (IndexedWord root : rootNodes) { sb.append("-> ").append(root.toString(wordFormat)).append(" (root)\n"); recToString(root, wordFormat, sb, 1, used); } Set<IndexedWord> nodes = wordMapFactory.newSet(); nodes.addAll(vertexSet()); nodes.removeAll(used); while (!nodes.isEmpty()) { IndexedWord node = nodes.iterator().next(); sb.append(node.toString(wordFormat)).append("\n"); recToString(node, wordFormat, sb, 1, used); nodes.removeAll(used); } return sb.toString(); }
/** * Determine if a tree is cyclic. * @param tree The tree to check. * @return True if the tree has at least once cycle in it. */ public static boolean isCyclic(SemanticGraph tree) { for (IndexedWord vertex : tree.vertexSet()) { if (tree.getRoots().contains(vertex)) { continue; } IndexedWord node = tree.incomingEdgeIterator(vertex).next().getGovernor(); Set<IndexedWord> seen = new HashSet<>(); seen.add(vertex); while (node != null) { if (seen.contains(node)) { return true; } seen.add(node); if (tree.incomingEdgeIterator(node).hasNext()) { node = tree.incomingEdgeIterator(node).next().getGovernor(); } else { node = null; } } } return false; }
/** * Process multi-word prepositions. */ private static void processMultiwordPreps(SemanticGraph sg) { /* Semgrexes require a graph with a root. */ if (sg.getRoots().isEmpty()) return; HashMap<String, HashSet<Integer>> bigrams = new HashMap<>(); HashMap<String, HashSet<Integer>> trigrams = new HashMap<>(); List<IndexedWord> vertexList = sg.vertexListSorted(); int numWords = vertexList.size(); for (int i = 1; i < numWords; i++) { String bigram = vertexList.get(i-1).value().toLowerCase() + '_' + vertexList.get(i).value().toLowerCase(); bigrams.putIfAbsent(bigram, new HashSet<>()); bigrams.get(bigram).add(vertexList.get(i-1).index()); if (i > 1) { String trigram = vertexList.get(i-2).value().toLowerCase() + '_' + bigram; trigrams.putIfAbsent(trigram, new HashSet<>()); trigrams.get(trigram).add(vertexList.get(i-2).index()); } } /* Simple two-word prepositions. */ processSimple2WP(sg, bigrams); /* More complex two-word prepositions in which the first * preposition is the head of the prepositional phrase. */ processComplex2WP(sg, bigrams); /* Process three-word prepositions. */ process3WP(sg, trigrams); }
/** * GIven a graph, returns a new graph with the the new sentence index enforced. * NOTE: new vertices are inserted. * TODO: is this ok? rewrite this? */ public static SemanticGraph setSentIndex(SemanticGraph sg, int newSentIndex) { SemanticGraph newGraph = new SemanticGraph(sg); List<IndexedWord> prevRoots = new ArrayList<>(newGraph.getRoots()); List<IndexedWord> newRoots = new ArrayList<>(); // TODO: we are using vertexListSorted here because we're changing // vertices while iterating. Perhaps there is a better way to do it. for (IndexedWord node : newGraph.vertexListSorted()) { IndexedWord newWord = new IndexedWord(node); newWord.setSentIndex(newSentIndex); SemanticGraphUtils.replaceNode(newWord, node, newGraph); if (prevRoots.contains(node)) newRoots.add(newWord); } newGraph.setRoots(newRoots); return newGraph; }
/** * Mostly just an alias, but make sure our featurizer is serializable! */ public interface Featurizer extends Function<Triple<ClauseSplitterSearchProblem.State, ClauseSplitterSearchProblem.Action, ClauseSplitterSearchProblem.State>, Counter<String>>, Serializable { boolean isSimpleSplit(Counter<String> feats); }
private static void demoteQmodParentHelper(SemanticGraph sg, IndexedWord gov, IndexedWord oldHead) { if (!sg.getRoots().contains(oldHead)) { IndexedWord parent = sg.getParent(oldHead); if (parent == null) { return; } SemanticGraphEdge edge = sg.getEdge(parent, oldHead); sg.addEdge(parent, gov, edge.getRelation(), edge.getWeight(), edge.isExtra()); sg.removeEdge(edge); } else { sg.getRoots().remove(oldHead); sg.addRoot(gov); } //temporary relation to keep the graph connected sg.addEdge(gov, oldHead, DEPENDENT, Double.NEGATIVE_INFINITY, false); sg.removeEdge(sg.getEdge(oldHead, gov)); }
private static void createMultiWordExpression(SemanticGraph sg, IndexedWord gov, GrammaticalRelation reln, IndexedWord... words) { if (sg.getRoots().isEmpty() || gov == null || words.length < 1) { return; } boolean first = true; IndexedWord mweHead = null; for (IndexedWord word : words) { IndexedWord wordGov = sg.getParent(word); if (wordGov != null) { SemanticGraphEdge edge = sg.getEdge(wordGov, word); if (edge != null) { sg.removeEdge(edge); } } if (first) { sg.addEdge(gov, word, reln, Double.NEGATIVE_INFINITY, false); mweHead = word; first = false; } else { sg.addEdge(mweHead, word, MULTI_WORD_EXPRESSION, Double.NEGATIVE_INFINITY, false); } } }
private static Pair<IndexedWord, String> findDependentVerb(Mention m) { if (m.dependency.getRoots().size() == 0) { return new Pair<>(); } // would be nice to condense this pattern, but sadly =reln // always uses the last relation in the sequence, not the first SemgrexPattern pattern = SemgrexPattern.compile("{idx:" + (m.headIndex+1) + "} [ <=reln {tag:/^V.*/}=verb | <=reln ({} << {tag:/^V.*/}=verb) ]"); SemgrexMatcher matcher = pattern.matcher(m.dependency); while (matcher.find()) { return Pair.makePair(matcher.getNode("verb"), matcher.getRelnString("reln")); } return new Pair<>(); }
private static Pair<IndexedWord, String> findDependentVerb(Mention m) { if (m.enhancedDependency.getRoots().size() == 0) { return new Pair<>(); } // would be nice to condense this pattern, but sadly =reln // always uses the last relation in the sequence, not the first SemgrexPattern pattern = SemgrexPattern.compile("{idx:" + (m.headIndex+1) + "} [ <=reln {tag:/^V.*/}=verb | <=reln ({} << {tag:/^V.*/}=verb) ]"); SemgrexMatcher matcher = pattern.matcher(m.enhancedDependency); while (matcher.find()) { return Pair.makePair(matcher.getNode("verb"), matcher.getRelnString("reln")); } return new Pair<>(); }
/** * Copies a the current graph, but also sets the mapping from the old to new * graph. */ public SemanticGraph(SemanticGraph g, Map<IndexedWord, IndexedWord> prevToNewMap) { graph = new DirectedMultiGraph<>(outerMapFactory, innerMapFactory); if (prevToNewMap == null) { prevToNewMap = wordMapFactory.newMap(); } Set<IndexedWord> vertexes = g.vertexSet(); for (IndexedWord vertex : vertexes) { IndexedWord newVertex = new IndexedWord(vertex); newVertex.setCopyCount(vertex.copyCount()); addVertex(newVertex); prevToNewMap.put(vertex, newVertex); } roots = wordMapFactory.newSet(); for (IndexedWord oldRoot : g.getRoots()) { roots.add(prevToNewMap.get(oldRoot)); } for (SemanticGraphEdge edge : g.edgeIterable()) { IndexedWord newGov = prevToNewMap.get(edge.getGovernor()); IndexedWord newDep = prevToNewMap.get(edge.getDependent()); addEdge(newGov, newDep, edge.getRelation(), edge.getWeight(), edge.isExtra()); } }
for (IndexedWord root : graph.getRoots()) { String rel = GrammaticalRelation.ROOT.getLongName(); rel = rel.replaceAll("\\s+", ""); // future proofing
/** * Convert a dependency graph to a format expected as input to {@link Writer#set(String, Object)}. */ @SuppressWarnings("RedundantCast") // It's lying; we need the "redundant" casts (as of 2014-09-08) private static Object buildDependencyTree(SemanticGraph graph) { if(graph != null) { return Stream.concat( // Roots graph.getRoots().stream().map( (IndexedWord root) -> (Consumer<Writer>) dep -> { dep.set("dep", "ROOT"); dep.set("governor", 0); dep.set("governorGloss", "ROOT"); dep.set("dependent", root.index()); dep.set("dependentGloss", root.word()); }), // Regular edges graph.edgeListSorted().stream().map( (SemanticGraphEdge edge) -> (Consumer<Writer>) (Writer dep) -> { dep.set("dep", edge.getRelation().toString()); dep.set("governor", edge.getGovernor().index()); dep.set("governorGloss", edge.getGovernor().word()); dep.set("dependent", edge.getDependent().index()); dep.set("dependentGloss", edge.getDependent().word()); }) ); } else { return null; } }
/** * Given a list of graphs, constructs a new graph combined from the * collection of graphs. Original vertices are used, edges are * copied. Graphs are ordered by the sentence index and index of * the original vertices. Intent is to create a "mega graph" * similar to the graphs used in the RTE problem. * <br> * This method only works if the indexed words have different * sentence ids, as otherwise the maps used will confuse several of * the IndexedWords. */ public static SemanticGraph makeFromGraphs(Collection<SemanticGraph> sgList) { SemanticGraph sg = new SemanticGraph(); Collection<IndexedWord> newRoots = Generics.newHashSet(); for (SemanticGraph currSg : sgList) { newRoots.addAll(currSg.getRoots()); for (IndexedWord currVertex : currSg.vertexSet()) sg.addVertex(currVertex); for (SemanticGraphEdge currEdge : currSg.edgeIterable()) sg.addEdge(currEdge.getGovernor(), currEdge.getDependent(), currEdge.getRelation(), currEdge.getWeight(), currEdge.isExtra()); } sg.setRoots(newRoots); return sg; }
private static Object getNodes(SemanticGraph graph) { if(graph != null) { List<IndexedWord> vertexList = graph.vertexListSorted(); int maxIndex = vertexList.get(vertexList.size() - 1).index(); return vertexList.stream().map( (IndexedWord token) -> (Consumer<Writer>) node -> { if (token.copyCount() == 0) { node.set("id", getNodeIndex(token, maxIndex)); node.set("start", token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)); node.set("end", token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); } else { node.set("id", getNodeIndex(token, maxIndex)); node.set("source", token.index()); } node.set("form", token.word()); if (graph.getRoots().contains(token)) node.set("top", true); node.set("properties", (Consumer<Writer>) propertiesWriter -> { propertiesWriter.set("xpos", token.tag()); propertiesWriter.set("upos", token.get(CoreAnnotations.CoarseTagAnnotation.class)); propertiesWriter.set("lemma", token.lemma()); }); node.set("edges", graph.getOutEdgesSorted(token).stream().map( (SemanticGraphEdge dep) -> (Consumer<Writer>) edge -> { edge.set("target", getNodeIndex(dep.getDependent(), maxIndex)); edge.set("label", dep.getRelation().toString()); })); } ); } else { return null; } }
/** * This creates a new graph based off the given, but uses the existing nodes objects. */ public static SemanticGraph duplicateKeepNodes(SemanticGraph sg) { SemanticGraph retSg = new SemanticGraph(); for (IndexedWord node : sg.vertexSet()) { retSg.addVertex(node); } retSg.setRoots(sg.getRoots()); for (SemanticGraphEdge edge : sg.edgeIterable()) { retSg.addEdge(edge.getGovernor(), edge.getDependent(), edge.getRelation(), edge.getWeight(), edge.isExtra()); } return retSg; }