/** * Find the next match of the pattern on the tree such that the * matching node (that is, the tree node matching the root node of * the pattern) differs from the previous matching node. * @return true iff another matching node is found. */ public boolean findNextMatchingNode() { Tree lastMatchingNode = getMatch(); while(find()) { if(getMatch() != lastMatchingNode) return true; } return false; }
@Override public Tree getMatch() { // in general, only DescriptionNodes can match // exception: if we are a positive disjunction, we care about // exactly one of the children, so we return its match if (!myNode.isConj && !myNode.isNegated()) { if (currChild >= children.length || currChild < 0 || children[currChild] == null) { return null; } else { return children[currChild].getMatch(); } } else { throw new UnsupportedOperationException(); } } } // end private class CoordinationMatcher
public String apply(TregexMatcher m) { Tree tagNode = m.getMatch(); String yield = tagNode.firstChild().value(); String prefix = yield.substring(0, Math.min(yield.length(), prefixLength)); return "[p," + prefix + ']'; }
public void visitTree(Tree t) { int numMatches = 0; TregexMatcher match = p.matcher(t); List<Tree> matchedPartList = null; // initialize lazily, since usually most trees don't match! while (match.find()) { Tree curMatch = match.getMatch(); //System.out.println("Found match is: " + curMatch); if (matchedPartList == null) matchedPartList = new ArrayList<>(); matchedPartList.add(curMatch); numMatches++; } // end while match.find() if(numMatches > 0) { TreeFromFile tff = new TreeFromFile(t, filename); matchedTrees.add(tff); matchedParts.put(tff,matchedPartList); totalMatches += numMatches; } } // end visitTree
/** * Returns the indices of all imperative verbs in the * tree t. */ private static Set<Integer> getImperatives(Tree t) { Set<Integer> imps = new HashSet<>(); TregexMatcher matcher = IMPERATIVE_PATTERN.matcher(t); while (matcher.find()) { List<Label> verbs = matcher.getMatch().yield(); CoreLabel cl = (CoreLabel) verbs.get(0); imps.add(cl.index()); } return imps; }
/** * Find the next point (preterminal) at which the given tree should * be split. * * @param t * @return The endpoint of a subtree which should be extracted, or * {@code null} if there are no subtrees which need to be * extracted. */ static Tree findSplitPoint(Tree t) { TregexMatcher m = pSplitPoint.matcher(t); if (m.find()) return m.getMatch(); return null; }
public String apply(TregexMatcher m) { if(mwCounter == null) throw new RuntimeException("Cannot enable POSSequence features without POS sequence map. Use option -frenchMWMap."); Tree t = m.getMatch(); StringBuilder sb = new StringBuilder(); for(Tree kid : t.children()) { if( ! kid.isPreTerminal()) throw new RuntimeException("Not POS sequence for tree: " + t.toString()); String tag = doBasicCat ? tlp.basicCategory(kid.value()) : kid.value(); sb.append(tag).append(" "); } if(mwCounter.getCount(t.value(), sb.toString().trim()) > cutoff) return annotationMark + sb.toString().replaceAll("\\s+", "").toLowerCase(); else return ""; }
public String apply(TregexMatcher matcher) { Tree matchedTree = matcher.getMatch(); Tree head = headFinder.determineHead(matchedTree); if (!head.isPrePreTerminal()) return ""; Tree lexicalHead = head.firstChild().firstChild(); String headValue = lexicalHead.value(); if (headValue != null) { if (lowerCase) headValue = headValue.toLowerCase(); return '[' + headValue + ']'; } else { return ""; } }
protected static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) { List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class); Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class); tree.indexLeaves(); SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class); TregexPattern tgrepPattern = npOrPrpMentionPattern; TregexMatcher matcher = tgrepPattern.matcher(tree); while (matcher.find()) { Tree t = matcher.getMatch(); List<Tree> mLeaves = t.getLeaves(); int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1; int endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class); if (",".equals(sent.get(endIdx-1).word())) { endIdx--; } // try not to have span that ends with , IntPair mSpan = new IntPair(beginIdx, endIdx); if(!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIdx, endIdx, dependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t); mentions.add(m); mentionSpanSet.add(mSpan); } } } /** Extract enumerations (A, B, and C) */
Tree match = m.getMatch(); String label = match.value(); List<CoreLabel> yield = match.taggedLabeledYield();
TregexMatcher m = rootMatch.matcher(t); while(m.findNextMatchingNode()) { Tree match = m.getMatch(); StringBuilder sb = new StringBuilder(); for(Tree kid : match.children())
TregexMatcher matcher = tgrepPattern.matcher(tree); while (matcher.find()) { Tree t = matcher.getMatch(); List<Tree> mLeaves = t.getLeaves(); int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1;
private void findTreePattern(Tree tree, TregexPattern tgrepPattern, Set<Pair<Integer, Integer>> foundPairs) { try { TregexMatcher m = tgrepPattern.matcher(tree); while (m.find()) { Tree t = m.getMatch(); Tree np1 = m.getNode("m1"); Tree np2 = m.getNode("m2"); Tree np3 = null; if(tgrepPattern.pattern().contains("m3")) np3 = m.getNode("m3"); addFoundPair(np1, np2, t, foundPairs); if(np3!=null) addFoundPair(np2, np3, t, foundPairs); } } catch (Exception e) { // shouldn't happen.... throw new RuntimeException(e); } }
protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) { List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class); Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class); SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class); TregexPattern tgrepPattern = enumerationsMentionPattern; TregexMatcher matcher = tgrepPattern.matcher(tree); Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap(); while (matcher.find()) { matcher.getMatch(); Tree m1 = matcher.getNode("m1"); Tree m2 = matcher.getNode("m2"); List<Tree> mLeaves = m1.getLeaves(); int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1; int endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class); spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1); mLeaves = m2.getLeaves(); beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1; endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class); spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2); } for(IntPair mSpan : spanToMentionSubTree.keySet()){ if(!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, mSpan.get(0), mSpan.get(1), dependency, new ArrayList<>(sent.subList(mSpan.get(0), mSpan.get(1))), spanToMentionSubTree.get(mSpan)); mentions.add(m); mentionSpanSet.add(mSpan); } } }
TregexMatcher matcher = tgrepPattern.matcher(tree); while (matcher.find()) { Tree t = matcher.getMatch(); List<Tree> mLeaves = t.getLeaves(); int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1;
while (matcher.find()) { sentWriter.set(Integer.toString(i++), (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer matchWriter) -> { matchWriter.set("match", matcher.getMatch().pennString()); matchWriter.set("namedNodes", matcher.getNodeNames().stream().map(nodeName -> (Consumer<JSONOutputter.Writer>) (JSONOutputter.Writer namedNodeWriter) -> namedNodeWriter.set(nodeName, matcher.getNode(nodeName).pennString())
Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap(); while (matcher.find()) { matcher.getMatch(); Tree m1 = matcher.getNode("m1"); Tree m2 = matcher.getNode("m2");
static public void countMWEStatistics(Tree t, TwoDimensionalCounter<String, String> unigramTagger, TwoDimensionalCounter<String, String> labelPreterm, TwoDimensionalCounter<String, String> pretermLabel, TwoDimensionalCounter<String, String> labelTerm, TwoDimensionalCounter<String, String> termLabel) { updateTagger(unigramTagger,t); //Count MWE statistics TregexMatcher m = pMWE.matcher(t); while (m.findNextMatchingNode()) { Tree match = m.getMatch(); String label = match.value(); if(RESOLVE_DUMMY_TAGS && label.equals(FrenchXMLTreeReader.MISSING_PHRASAL)) continue; String preterm = SentenceUtils.listToString(match.preTerminalYield()); String term = SentenceUtils.listToString(match.yield()); labelPreterm.incrementCount(label,preterm); pretermLabel.incrementCount(preterm,label); labelTerm.incrementCount(label,term); termLabel.incrementCount(term, label); } }