Refine search
parse = new SemanticGraph(parse); for (IndexedWord node : new HashSet<>(parse.vertexSet())) { // copy the vertex set to prevent ConcurrentModificationExceptions if (node.tag() != null && node.tag().startsWith("PRP")) { List<CoreLabel> canonicalMention = canonicalMentionMap.get(node.backingLabel()); if (canonicalMention != null) { List<SemanticGraphEdge> incomingEdges = parse.incomingEdgeList(node); List<SemanticGraphEdge> outgoingEdges = parse.outgoingEdgeList(node); parse.removeVertex(node); IndexedWord headWord = new IndexedWord(canonicalMention.get(canonicalMention.size() - 1)); headWord.setPseudoPosition(node.pseudoPosition()); parse.addVertex(headWord); for (SemanticGraphEdge edge : incomingEdges) { parse.addEdge(edge.getGovernor(), headWord, edge.getRelation(), edge.getWeight(), edge.isExtra()); parse.addEdge(headWord, edge.getDependent(), edge.getRelation(), edge.getWeight(), edge.isExtra()); pseudoPosition -= 1e-3; parse.addVertex(dependent); parse.addEdge(headWord, dependent, UniversalEnglishGrammaticalRelations.COMPOUND_MODIFIER, 1.0, false);
Set<IndexedWord> subgraphNodeSet = sg.getSubgraphVertices(rootNode); if ( ! sg.isDag(rootNode)) { for (IndexedWord child : sg.getChildren(rootNode)) { Set<IndexedWord> reachableSet = sg.getSubgraphVertices(child); if (reachableSet.contains(rootNode)) { throw new IllegalArgumentException("Subtree cannot contain cycle leading back to root node!"); Collections.sort(sortedSubgraphNodes); IndexedWord newNode = new IndexedWord(rootNode.docID(), rootNode.sentIndex(), rootNode.index()); for (Class key : newNode.backingLabel().keySet()) { newNode.set(key, rootNode.get(key)); newNode.setValue(StringUtils.join(sortedSubgraphNodes.stream().map(IndexedWord::value), " ")); newNode.setWord(StringUtils.join(sortedSubgraphNodes.stream().map(IndexedWord::word), " ")); newNode.setLemma(StringUtils.join(sortedSubgraphNodes.stream().map(x -> x.lemma() == null ? x.word() : x.lemma()), " ")); if (sg.getRoots().contains(rootNode)) { sg.getRoots().remove(rootNode); sg.addRoot(rootNode); for (SemanticGraphEdge edge : sg.incomingEdgeIterable(rootNode)) { sg.addEdge(edge.getGovernor(), newNode, edge.getRelation(), edge.getWeight(), edge.isExtra()); sg.removeVertex(node);
private String toXMLString() { StringBuilder buf = new StringBuilder("<dependencies style=\"typed\">\n"); for (SemanticGraphEdge edge : this.edgeListSorted()) { String reln = edge.getRelation().toString(); String gov = (edge.getSource()).word(); int govIdx = (edge.getSource()).index(); String dep = (edge.getTarget()).word(); int depIdx = (edge.getTarget()).index(); buf.append(" <dep type=\"").append(reln).append("\">\n"); buf.append(" <governor idx=\"").append(govIdx).append("\">").append(gov).append("</governor>\n"); buf.append(" <dependent idx=\"").append(depIdx).append("\">").append(dep).append("</dependent>\n"); buf.append(" </dep>\n"); } buf.append("</dependencies>\n"); return buf.toString(); }
private void descendantsHelper(IndexedWord curr, Set<IndexedWord> descendantSet) { if (descendantSet.contains(curr)) { return; } descendantSet.add(curr); for (IndexedWord child : getChildren(curr)) { descendantsHelper(child, descendantSet); } }
public List<IndexedWord> getChildList(IndexedWord vertex) { if (!containsVertex(vertex)) { throw new IllegalArgumentException(); } List<IndexedWord> result = new ArrayList<>(getChildren(vertex)); Collections.sort(result); return result; }
public SentenceFragment(SemanticGraph tree, boolean assumedTruth, boolean copy) { if (copy) { this.parseTree = new SemanticGraph(tree); } else { this.parseTree = tree; } this.assumedTruth = assumedTruth; words.addAll(this.parseTree.vertexListSorted().stream().map(IndexedWord::backingLabel).collect(Collectors.toList())); }
SemanticGraph graph = new SemanticGraph(); CoreLabel token; if (document.isPresent()) { token = document.get().get(SentencesAnnotation.class).get(in.getSentenceIndex()).get(TokensAnnotation.class).get(in.getIndex() - 1); // token index starts at 1! } else { token = sentence.get(in.getIndex() - 1); // index starts at 1! if (in.hasCopyAnnotation() && in.getCopyAnnotation() > 0) { word = new IndexedWord(new CoreLabel(token)); word.setCopyCount(in.getCopyAnnotation()); } else { word = new IndexedWord(token); assert in.getIndex() == word.index(); nodes.put(in.getIndex(), in.getCopyAnnotation(), word); graph.addVertex(word); graph.addEdge(source, target, rel, 1.0, ie.hasIsExtra() && ie.getIsExtra()); Collection<IndexedWord> roots = proto.getRootList().stream().map(rootI -> nodes.get(rootI, 0)).collect(Collectors.toList()); graph.setRoots(roots); } else { if(!graph.isEmpty()){ graph.resetRoots();
if(coreLabel.get(positiveClass).equals(label) || constVars.functionWords.contains(coreLabel.word())) return false; else List<String> textTokens = sent.getTokens().stream().map(x -> x.word()).collect(Collectors.toList()); if(!acceptWord.test(l)) continue; IndexedWord w = g.getNodeByIndex(l.index()); List<String> outputPhrases = new ArrayList<>(); List<ExtractedPhrase> extractedPhrases = new ArrayList<>();
/** * Return a Protobuf RelationTriple from a RelationTriple. */ public static CoreNLPProtos.SentenceFragment toProto(SentenceFragment fragment) { return CoreNLPProtos.SentenceFragment.newBuilder() .setAssumedTruth(fragment.assumedTruth) .setScore(fragment.score) .addAllTokenIndex(fragment.words.stream().map(x -> x.index() - 1).collect(Collectors.toList())) .setRoot(fragment.parseTree.getFirstRoot().index() - 1) .build(); }
if (sentence.get(CoreAnnotations.TokensAnnotation.class) != null) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); SemanticGraph depTree = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class); for (int i = 0; i < tokens.size(); ++i) { String deprel = null; if (depTree != null) { Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index).collect(Collectors.toSet()); IndexedWord node = depTree.getNodeByIndexSafe(i + 1); if (node != null) { List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node); if (!edgeList.isEmpty()) { assert edgeList.size() == 1; head = edgeList.get(0).getGovernor().index(); deprel = edgeList.get(0).getRelation().toString(); } else if (rootSet.contains(i + 1)) {
CoreNLPProtos.DependencyGraph.Builder builder = CoreNLPProtos.DependencyGraph.newBuilder(); Set<Integer> rootSet = graph.getRoots().stream().map(IndexedWord::index).collect(Collectors.toCollection(IdentityHashSet::new)); for (IndexedWord node : graph.vertexSet()) { .setSentenceIndex(node.get(SentenceIndexAnnotation.class)) .setIndex(node.index()); if (node.copyCount() > 0) { nodeBuilder.setCopyAnnotation(node.copyCount()); for (SemanticGraphEdge edge : graph.edgeIterable()) {
throw new RuntimeInterruptedException(); SemanticGraph fragmentTree = new SemanticGraph(tree); fragmentTree.resetRoots(); fragmentTree.vertexSet().stream() .filter(vertex -> vertex.index() - 1 == fragment.getRoot()) .forEach(fragmentTree::setRoot); tree.vertexSet().stream() .filter(vertex -> !keptIndices.contains(vertex.index() - 1)) .forEach(fragmentTree::removeVertex); fragmentTree.vertexSet().stream() .filter(vertex -> fragmentTree.getFirstRoot() != vertex && tree.getFirstRoot() != vertex && !fragmentTree.incomingEdgeIterable(vertex).iterator().hasNext()) .forEach(vertex -> { SemanticGraphEdge edge = tree.incomingEdgeIterable(vertex).iterator().next(); fragmentTree.addEdge(fragmentTree.getFirstRoot(), edge.getDependent(), edge.getRelation(), edge.getWeight(), edge.isExtra()); });
/** * Convert a dependency graph to a format expected as input to {@link Writer#set(String, Object)}. */ @SuppressWarnings("RedundantCast") // It's lying; we need the "redundant" casts (as of 2014-09-08) private static Object buildDependencyTree(SemanticGraph graph) { if(graph != null) { return Stream.concat( // Roots graph.getRoots().stream().map( (IndexedWord root) -> (Consumer<Writer>) dep -> { dep.set("dep", "ROOT"); dep.set("governor", 0); dep.set("governorGloss", "ROOT"); dep.set("dependent", root.index()); dep.set("dependentGloss", root.word()); }), // Regular edges graph.edgeListSorted().stream().map( (SemanticGraphEdge edge) -> (Consumer<Writer>) (Writer dep) -> { dep.set("dep", edge.getRelation().toString()); dep.set("governor", edge.getGovernor().index()); dep.set("governorGloss", edge.getGovernor().word()); dep.set("dependent", edge.getDependent().index()); dep.set("dependentGloss", edge.getDependent().word()); }) ); } else { return null; } }
private static Object getNodes(SemanticGraph graph) { if(graph != null) { List<IndexedWord> vertexList = graph.vertexListSorted(); int maxIndex = vertexList.get(vertexList.size() - 1).index(); return vertexList.stream().map( (IndexedWord token) -> (Consumer<Writer>) node -> { if (token.copyCount() == 0) { node.set("id", getNodeIndex(token, maxIndex)); node.set("start", token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)); node.set("end", token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); } else { node.set("id", getNodeIndex(token, maxIndex)); node.set("source", token.index()); } node.set("form", token.word()); if (graph.getRoots().contains(token)) node.set("top", true); node.set("properties", (Consumer<Writer>) propertiesWriter -> { propertiesWriter.set("xpos", token.tag()); propertiesWriter.set("upos", token.get(CoreAnnotations.CoarseTagAnnotation.class)); propertiesWriter.set("lemma", token.lemma()); }); node.set("edges", graph.getOutEdgesSorted(token).stream().map( (SemanticGraphEdge dep) -> (Consumer<Writer>) edge -> { edge.set("target", getNodeIndex(dep.getDependent(), maxIndex)); edge.set("label", dep.getRelation().toString()); })); } ); } else { return null; } }
public String getRelation() { if(headIndexedWord == null) return null; if(enhancedDependency.getRoots().isEmpty()) return null; if(enhancedDependency.getFirstRoot().equals(headIndexedWord)) return "root"; if(!enhancedDependency.containsVertex(getHeadParent())) return null; GrammaticalRelation relation = enhancedDependency.reln(getHeadParent(), headIndexedWord); && getHeadChildren().stream().anyMatch(c -> c.tag().equals("IN"))) || relation == UniversalEnglishGrammaticalRelations.TEMPORAL_MODIFIER || relation == UniversalEnglishGrammaticalRelations.ADV_CLAUSE_MODIFIER
/** * Returns the yield span for the word rooted at the given node, but only traversing a fixed set of relations. * @param tree The dependency graph to get the span from. * @param root The root word of the span. * @return A one indexed span rooted at the given word. */ private static Pair<Integer, Integer> getModifierSubtreeSpan(SemanticGraph tree, IndexedWord root) { if (tree.outgoingEdgeList(root).stream().anyMatch(x -> "neg".equals(x.getRelation().getShortName()))) { return getGeneralizedSubtreeSpan(tree, root, Collections.singleton("nmod")); } else { return getGeneralizedSubtreeSpan(tree, root, MODIFIER_ARCS); } }
Set<IndexedWord> children = new HashSet<>(sg.getChildren(oldHead)); for (IndexedWord child : children) { SemanticGraphEdge oldEdge = sg.getEdge(oldHead, child); if (oldEdge.getRelation() == UniversalEnglishGrammaticalRelations.COMPOUND_MODIFIER) { sg.addEdge(oldHead, child, UniversalEnglishGrammaticalRelations.NOMINAL_MODIFIER, oldEdge.getWeight(), oldEdge.isExtra()); sg.removeEdge(oldEdge); for (int i = nameParts.get(0).index(), end = oldHead.index(); i < end; i++) { IndexedWord node = sg.getNodeByIndexSafe(i); if (node == null) { return; IndexedWord gov = sg.getParent(oldHead); if (gov == null && ! sg.getRoots().contains(oldHead)) { return; Set<IndexedWord> children = new HashSet<>(sg.getChildren(oldHead)); sg.getRoots().add(newHead); sg.getRoots().remove(oldHead); } else { SemanticGraphEdge oldEdge = sg.getEdge(gov, oldHead); sg.addEdge(gov, newHead, oldEdge.getRelation(), oldEdge.getWeight(), oldEdge.isExtra()); sg.removeEdge(oldEdge); SemanticGraphEdge oldEdge = sg.getEdge(oldHead, newHead); sg.addEdge(newHead, oldHead, UniversalEnglishGrammaticalRelations.NAME_MODIFIER, oldEdge.getWeight(), oldEdge.isExtra()); sg.removeEdge(oldEdge);
if (descendantSet.contains(curr) || (doNotAddThese!=null && doNotAddThese.contains(curr)) || !acceptWord.test(curr.backingLabel())) { return; if (!ignoreCommonTags || !ignoreTags.contains(curr.tag().trim())) { descendantSet.add(curr); for (IndexedWord child : g.getChildren(curr)) { boolean dontuse = false; if (doNotAddThese!=null &&doNotAddThese.contains(child)) rel = g.reln(curr, child); dontuse = checkIfSatisfiesRelConstrains(g, curr, child, allCutOffRels, rel); if (child.tag().matches(cutOffTagRegex)) { if (DEBUG >= 5) System.out.println("ignored tag " + child
/** * Mostly just an alias, but make sure our featurizer is serializable! */ public interface Featurizer extends Function<Triple<ClauseSplitterSearchProblem.State, ClauseSplitterSearchProblem.Action, ClauseSplitterSearchProblem.State>, Counter<String>>, Serializable { boolean isSimpleSplit(Counter<String> feats); }
List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel token : tokens) { OperatorSpec specOrNull = token.get(OperatorAnnotation.class); if (specOrNull != null) { operators.add(specOrNull); if (sentence.containsKey(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class)) { for (IndexedWord token : sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class).vertexSet()) { token.set(PolarityAnnotation.class, Polarity.DEFAULT); if (sentence.containsKey(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class)) { for (IndexedWord token : sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class).vertexSet()) { token.set(PolarityAnnotation.class, Polarity.DEFAULT); if (sentence.containsKey(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)) { for (IndexedWord token : sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class).vertexSet()) { token.set(PolarityAnnotation.class, Polarity.DEFAULT); token.set(PolarityAnnotation.class, polarity); Polarity polarity = token.get(PolarityAnnotation.class); if (polarity != null) { if (polarity.isUpwards()) {