private static String conjValue(IndexedWord cc, SemanticGraph sg) { List<IndexedWord> yield = sg.yield(cc); if (yield.size() < 2) { return cc.lemma(); } List<String> ccParts = new LinkedList<>(); yield.stream().forEach(iw -> ccParts.add(iw.lemma())); return StringUtils.join(ccParts, "_").toLowerCase(); }
public int getModal(Dictionaries dict) { if(headIndexedWord == null) return 0; // direct modal in a child Collection<IndexedWord> children = enhancedDependency.getChildren(headIndexedWord); for(IndexedWord child : children) { if(dict.modals.contains(child.lemma())) return 1; } // check the parent IndexedWord parent = getHeadParent(); if (parent != null) { if(dict.modals.contains(parent.lemma())) return 1; // check the children of the parent (that is needed for modal auxiliaries) IndexedWord child = enhancedDependency.getChildWithReln(parent,UniversalEnglishGrammaticalRelations.AUX_MODIFIER); if(!enhancedDependency.hasParentWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT) && child != null && dict.modals.contains(child.lemma())) return 1; } // look at the path to root List<IndexedWord> path = getHeadPathToRoot(); if(path == null) return 0; for(IndexedWord word : path) { if(dict.modals.contains(word.lemma())) return 1; } return 0; }
public int getModal(Dictionaries dict) { if(headIndexedWord == null) return 0; // direct modal in a child Collection<IndexedWord> children = dependency.getChildren(headIndexedWord); for(IndexedWord child : children) { if(dict.modals.contains(child.lemma())) return 1; } // check the parent IndexedWord parent = dependency.getParent(headIndexedWord); if (parent != null) { if(dict.modals.contains(parent.lemma())) return 1; // check the children of the parent (that is needed for modal auxiliaries) IndexedWord child = dependency.getChildWithReln(parent, UniversalEnglishGrammaticalRelations.AUX_MODIFIER); if(!dependency.hasParentWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT) && child != null && dict.modals.contains(child.lemma())) return 1; } // look at the path to root List<IndexedWord> path = dependency.getPathToRoot(headIndexedWord); if(path == null) return 0; for(IndexedWord word : path) { if(dict.modals.contains(word.lemma())) return 1; } return 0; }
private static void addCaseMarkersToReln(SemanticGraph sg, IndexedWord gov, IndexedWord mod, IndexedWord caseMarker) { SemanticGraphEdge edge = sg.getEdge(gov, mod); List<IndexedWord> caseMarkers = new ArrayList<>(); caseMarkers.add(caseMarker); sg.getChildrenWithReln(caseMarker, FIXED).stream().forEach(iw -> caseMarkers.add(iw)); Collections.sort(caseMarkers); String relnName = StringUtils.join(caseMarkers.stream().map(iw->iw.lemma()), "_"); if (relnName.matches("[^a-zA-Z_]")) { return; } //for Joakim //GrammaticalRelation reln = getCaseMarkedRelation(edge.getRelation(), relnName.toLowerCase() + ":ENH_CASE"); GrammaticalRelation reln = getCaseMarkedRelation(edge.getRelation(), relnName.toLowerCase()); edge.setRelation(reln); }
public int getModifiers(Dictionaries dict){ if(headIndexedWord == null) return 0; int count = 0; List<Pair<GrammaticalRelation, IndexedWord>> childPairs = enhancedDependency.childPairs(headIndexedWord); for(Pair<GrammaticalRelation, IndexedWord> childPair : childPairs) { GrammaticalRelation gr = childPair.first; IndexedWord word = childPair.second; if(gr == UniversalEnglishGrammaticalRelations.ADJECTIVAL_MODIFIER || gr == UniversalEnglishGrammaticalRelations.RELATIVE_CLAUSE_MODIFIER || gr.toString().startsWith("prep_")) { count++; } // add possessive if not a personal determiner if(gr == UniversalEnglishGrammaticalRelations.POSSESSION_MODIFIER && !dict.determiners.contains(word.lemma())) { count++; } } return count; }
public int getReportEmbedding(Dictionaries dict) { if(headIndexedWord == null) return 0; // check adverbial clause with marker "as" for(IndexedWord sibling : dependency.getSiblings(headIndexedWord)) { if(dict.reportVerb.contains(sibling.lemma()) && dependency.hasParentWithReln(sibling,UniversalEnglishGrammaticalRelations.ADV_CLAUSE_MODIFIER)) { IndexedWord marker = dependency.getChildWithReln(sibling,UniversalEnglishGrammaticalRelations.MARKER); if (marker != null && marker.lemma().equals("as")) { return 1; } } } // look at the path to root List<IndexedWord> path = dependency.getPathToRoot(headIndexedWord); if(path == null) return 0; boolean isSubject = false; // if the node itself is a subject, we will not take into account its parent in the path if(dependency.hasParentWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT)) isSubject = true; for (IndexedWord word : path) { if(!isSubject && (dict.reportVerb.contains(word.lemma()) || dict.reportNoun.contains(word.lemma()))) { return 1; } // check how to put isSubject isSubject = dependency.hasParentWithReln(word, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT); } return 0; }
public int getReportEmbedding(Dictionaries dict) { if(headIndexedWord == null) return 0; // check adverbial clause with marker "as" for(IndexedWord sibling : getHeadSiblings()) { if(dict.reportVerb.contains(sibling.lemma()) && enhancedDependency.hasParentWithReln(sibling,UniversalEnglishGrammaticalRelations.ADV_CLAUSE_MODIFIER)) { IndexedWord marker = enhancedDependency.getChildWithReln(sibling,UniversalEnglishGrammaticalRelations.MARKER); if (marker != null && marker.lemma().equals("as")) { return 1; } } } // look at the path to root List<IndexedWord> path = getHeadPathToRoot(); if(path == null) return 0; boolean isSubject = false; // if the node itself is a subject, we will not take into account its parent in the path if(enhancedDependency.hasParentWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT)) isSubject = true; for (IndexedWord word : path) { if(!isSubject && (dict.reportVerb.contains(word.lemma()) || dict.reportNoun.contains(word.lemma()))) { return 1; } // check how to put isSubject isSubject = enhancedDependency.hasParentWithReln(word, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT); } return 0; }
public String getQuantification(Dictionaries dict){ if(headIndexedWord == null) return null; if(!nerString.equals("O")) return "definite"; Set<IndexedWord> quant = enhancedDependency.getChildrenWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.DETERMINER); Set<IndexedWord> poss = enhancedDependency.getChildrenWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.POSSESSION_MODIFIER); if (!quant.isEmpty()) { for (IndexedWord word : quant) { String det = word.lemma(); if (dict.determiners.contains(det)) { return "definite"; } else if (dict.quantifiers2.contains(det)) { return "quantified"; } } } else if (!poss.isEmpty()) { return "definite"; } else { quant = enhancedDependency.getChildrenWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.NUMERIC_MODIFIER); if (!quant.isEmpty()) { return "quantified"; } } return "indefinite"; }
public String getQuantification(Dictionaries dict){ if(headIndexedWord == null) return null; if(!nerString.equals("O")) return "definite"; Set<IndexedWord> quant = dependency.getChildrenWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.DETERMINER); Set<IndexedWord> poss = dependency.getChildrenWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.POSSESSION_MODIFIER); if (!quant.isEmpty()) { for (IndexedWord word : quant) { String det = word.lemma(); if (dict.determiners.contains(det)) { return "definite"; } else if (dict.quantifiers2.contains(det)) { return "quantified"; } } } else if (!poss.isEmpty()) { return "definite"; } else { quant = dependency.getChildrenWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.NUMERIC_MODIFIER); if (!quant.isEmpty()) { return "quantified"; } } return "indefinite"; }
public int getNegation(Dictionaries dict) { if(headIndexedWord == null) return 0; // direct negation in a child Collection<IndexedWord> children = enhancedDependency.getChildren(headIndexedWord); for(IndexedWord child : children) { if(dict.negations.contains(child.lemma())) return 1; } // or has a sibling for(IndexedWord sibling : getHeadSiblings()) { if(dict.negations.contains(sibling.lemma()) && !enhancedDependency.hasParentWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT)) return 1; } // check the parent List<Pair<GrammaticalRelation,IndexedWord>> parentPairs = enhancedDependency.parentPairs(headIndexedWord); if (!parentPairs.isEmpty()) { Pair<GrammaticalRelation,IndexedWord> parentPair = parentPairs.get(0); GrammaticalRelation gr = parentPair.first; // check negative prepositions if(dict.neg_relations.contains(gr.toString())) return 1; } return 0; }
public int getNegation(Dictionaries dict) { if(headIndexedWord == null) return 0; // direct negation in a child Collection<IndexedWord> children = dependency.getChildren(headIndexedWord); for(IndexedWord child : children) { if(dict.negations.contains(child.lemma())) return 1; } // or has a sibling for(IndexedWord sibling : dependency.getSiblings(headIndexedWord)) { if(dict.negations.contains(sibling.lemma()) && !dependency.hasParentWithReln(headIndexedWord, UniversalEnglishGrammaticalRelations.NOMINAL_SUBJECT)) return 1; } // check the parent List<Pair<GrammaticalRelation,IndexedWord>> parentPairs = dependency.parentPairs(headIndexedWord); if (!parentPairs.isEmpty()) { Pair<GrammaticalRelation,IndexedWord> parentPair = parentPairs.get(0); GrammaticalRelation gr = parentPair.first; // check negative prepositions if(dict.neg_relations.contains(gr.toString())) return 1; } return 0; }
public int getModifiers(Dictionaries dict){ if(headIndexedWord == null) return 0; int count = 0; List<Pair<GrammaticalRelation, IndexedWord>> childPairs = dependency.childPairs(headIndexedWord); for(Pair<GrammaticalRelation, IndexedWord> childPair : childPairs) { GrammaticalRelation gr = childPair.first; IndexedWord word = childPair.second; //adjectival modifiers, prepositional modifiers, relative clauses, and possessives if they are not a determiner if((gr == UniversalEnglishGrammaticalRelations.ADJECTIVAL_MODIFIER || gr == UniversalEnglishGrammaticalRelations.RELATIVE_CLAUSE_MODIFIER || gr.toString().startsWith("nmod") || gr.toString().startsWith("acl") || gr.toString().startsWith("advcl")) && !dict.determiners.contains(word.lemma())) { count++; } // add noun modifier when the mention isn't a NER if(nerString.equals("O") && gr == UniversalEnglishGrammaticalRelations.COMPOUND_MODIFIER) { count++; } } return count; }
/** * This converts the node into a simple string based representation. * NOTE: this is extremely brittle, and presumes values do not contain delimiters */ public static String cheapWordToString(IndexedWord node) { StringWriter buf = new StringWriter(); buf.write("{"); buf.write(WORD_KEY); buf.write(TUPLE_DELIMITER); buf.write(nullShield(node.word())); buf.write(ATOM_DELIMITER); buf.write(LEMMA_KEY); buf.write(TUPLE_DELIMITER); buf.write(nullShield(node.lemma())); buf.write(ATOM_DELIMITER); buf.write(POS_KEY); buf.write(TUPLE_DELIMITER); buf.write(nullShield(node.tag())); buf.write(ATOM_DELIMITER); buf.write(VALUE_KEY); buf.write(TUPLE_DELIMITER); buf.write(nullShield(node.value())); buf.write(ATOM_DELIMITER); buf.write(CURRENT_KEY); buf.write(TUPLE_DELIMITER); buf.write(nullShield(node.originalText())); buf.write("}"); return buf.toString(); }
public double deletionProbability(SemanticGraphEdge edge, Iterable<SemanticGraphEdge> neighbors) { String edgeRel = edge.getRelation().toString(); if (edgeRel.contains("prep")) { return ppDeletionProbability(edge, neighbors); } else if (edgeRel.contains("obj")) { return objDeletionProbability(edge, neighbors); } else if (edgeRel.contains("subj")) { return subjDeletionProbability(edge, neighbors); } else if (edgeRel.equals("amod")) { String word = (edge.getDependent().lemma() != null ? edge.getDependent().lemma() : edge.getDependent().word()).toLowerCase(); if (Util.PRIVATIVE_ADJECTIVES.contains(word)) { return 0.0; } else { return 1.0; } } else { return deletionProbability(edgeRel); } }
private static void addLemmata(SemanticGraph sg) { sg.vertexListSorted().forEach(w -> { if(w.lemma() == null) { w.setLemma(MORPH.lemma(w.word(), w.tag())); } }); }
/** * Mostly just an alias, but make sure our featurizer is serializable! */ public interface Featurizer extends Function<Triple<ClauseSplitterSearchProblem.State, ClauseSplitterSearchProblem.Action, ClauseSplitterSearchProblem.State>, Counter<String>>, Serializable { boolean isSimpleSplit(Counter<String> feats); }
private static Object getNodes(SemanticGraph graph) { if(graph != null) { List<IndexedWord> vertexList = graph.vertexListSorted(); int maxIndex = vertexList.get(vertexList.size() - 1).index(); return vertexList.stream().map( (IndexedWord token) -> (Consumer<Writer>) node -> { if (token.copyCount() == 0) { node.set("id", getNodeIndex(token, maxIndex)); node.set("start", token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)); node.set("end", token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); } else { node.set("id", getNodeIndex(token, maxIndex)); node.set("source", token.index()); } node.set("form", token.word()); if (graph.getRoots().contains(token)) node.set("top", true); node.set("properties", (Consumer<Writer>) propertiesWriter -> { propertiesWriter.set("xpos", token.tag()); propertiesWriter.set("upos", token.get(CoreAnnotations.CoarseTagAnnotation.class)); propertiesWriter.set("lemma", token.lemma()); }); node.set("edges", graph.getOutEdgesSorted(token).stream().map( (SemanticGraphEdge dep) -> (Consumer<Writer>) edge -> { edge.set("target", getNodeIndex(dep.getDependent(), maxIndex)); edge.set("label", dep.getRelation().toString()); })); } ); } else { return null; } }