public boolean test(Tree tree) { for (TregexPattern pattern : patterns) { if (pattern.matcher(tree).find()) { return false; } } return true; }
public void visitTree(Tree t) { int numMatches = 0; TregexMatcher match = p.matcher(t); List<Tree> matchedPartList = null; // initialize lazily, since usually most trees don't match! while (match.find()) { Tree curMatch = match.getMatch(); //System.out.println("Found match is: " + curMatch); if (matchedPartList == null) matchedPartList = new ArrayList<>(); matchedPartList.add(curMatch); numMatches++; } // end while match.find() if(numMatches > 0) { TreeFromFile tff = new TreeFromFile(t, filename); matchedTrees.add(tff); matchedParts.put(tff,matchedPartList); totalMatches += numMatches; } } // end visitTree
/** * Find the next match of the pattern on the tree such that the * matching node (that is, the tree node matching the root node of * the pattern) differs from the previous matching node. * @return true iff another matching node is found. */ public boolean findNextMatchingNode() { Tree lastMatchingNode = getMatch(); while(find()) { if(getMatch() != lastMatchingNode) return true; } return false; }
/** * Returns the indices of all imperative verbs in the * tree t. */ private static Set<Integer> getImperatives(Tree t) { Set<Integer> imps = new HashSet<>(); TregexMatcher matcher = IMPERATIVE_PATTERN.matcher(t); while (matcher.find()) { List<Label> verbs = matcher.getMatch().yield(); CoreLabel cl = (CoreLabel) verbs.get(0); imps.add(cl.index()); } return imps; }
/** * Find the next point (preterminal) at which the given tree should * be split. * * @param t * @return The endpoint of a subtree which should be extracted, or * {@code null} if there are no subtrees which need to be * extracted. */ static Tree findSplitPoint(Tree t) { TregexMatcher m = pSplitPoint.matcher(t); if (m.find()) return m.getMatch(); return null; }
while (m.find()) {
/** * ¡Venga, expand conmigo! */ private static Tree expandConmigo(Tree t) { TregexMatcher matcher = conmigoPattern.matcher(t); while (matcher.find()) { Tree conmigoNode = matcher.getNode("conmigo"); String word = conmigoNode.value(); String newPronoun = null; if (word.equalsIgnoreCase("conmigo")) newPronoun = "mí"; else if (word.equalsIgnoreCase("contigo")) newPronoun = "ti"; else if (word.equalsIgnoreCase("consigo")) newPronoun = "sí"; if (word.charAt(0) == 'C') newPronoun = newPronoun.toUpperCase(); String tsurgeon = String.format( "[relabel conmigo /%s/]" + "[adjoinF (sp (prep (sp000 con)) foot@) sn]", newPronoun); TsurgeonPattern pattern = Tsurgeon.parseOperation(tsurgeon); t = pattern.matcher().evaluate(t, matcher); } return t; }
private static boolean checkPleonastic(Mention m, Tree tree, TregexPattern tgrepPattern) { try { TregexMatcher matcher = tgrepPattern.matcher(tree); while (matcher.find()) { Tree np1 = matcher.getNode("m1"); if (((CoreLabel)np1.label()).get(CoreAnnotations.BeginIndexAnnotation.class)+1 == m.headWord.get(CoreAnnotations.IndexAnnotation.class)) { return true; } } } catch (Exception e) { e.printStackTrace(); } return false; }
private static boolean checkPleonastic(Mention m, Tree tree, TregexPattern tgrepPattern) { try { TregexMatcher matcher = tgrepPattern.matcher(tree); while (matcher.find()) { Tree np1 = matcher.getNode("m1"); if (((CoreLabel)np1.label()).get(CoreAnnotations.BeginIndexAnnotation.class)+1 == m.headWord.get(CoreAnnotations.IndexAnnotation.class)) { return true; } } } catch (Exception e) { e.printStackTrace(); } return false; } }
protected static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) { List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class); Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class); tree.indexLeaves(); SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class); TregexPattern tgrepPattern = npOrPrpMentionPattern; TregexMatcher matcher = tgrepPattern.matcher(tree); while (matcher.find()) { Tree t = matcher.getMatch(); List<Tree> mLeaves = t.getLeaves(); int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1; int endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class); if (",".equals(sent.get(endIdx-1).word())) { endIdx--; } // try not to have span that ends with , IntPair mSpan = new IntPair(beginIdx, endIdx); if(!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIdx, endIdx, dependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t); mentions.add(m); mentionSpanSet.add(mSpan); } } } /** Extract enumerations (A, B, and C) */
while (matcher.find()) { Tree t = matcher.getMatch(); List<Tree> mLeaves = t.getLeaves();
/** * Tries to match a pattern against a tree. If it succeeds, apply the surgical operations contained in a {@link TsurgeonPattern}. * * @param matchPattern A {@link TregexPattern} to be matched against a {@link Tree}. * @param p A {@link TsurgeonPattern} to apply. * @param t the {@link Tree} to match against and perform surgery on. * @return t, which has been surgically modified. */ public static Tree processPattern(TregexPattern matchPattern, TsurgeonPattern p, Tree t) { TregexMatcher m = matchPattern.matcher(t); TsurgeonMatcher tsm = p.matcher(); while (m.find()) { t = tsm.evaluate(t, m); if (t==null) { break; } m = matchPattern.matcher(t); } return t; }
private void findTreePattern(Tree tree, TregexPattern tgrepPattern, Set<Pair<Integer, Integer>> foundPairs) { try { TregexMatcher m = tgrepPattern.matcher(tree); while (m.find()) { Tree t = m.getMatch(); Tree np1 = m.getNode("m1"); Tree np2 = m.getNode("m2"); Tree np3 = null; if(tgrepPattern.pattern().contains("m3")) np3 = m.getNode("m3"); addFoundPair(np1, np2, t, foundPairs); if(np3!=null) addFoundPair(np2, np3, t, foundPairs); } } catch (Exception e) { // shouldn't happen.... throw new RuntimeException(e); } }
protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) { List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class); Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class); SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class); TregexPattern tgrepPattern = enumerationsMentionPattern; TregexMatcher matcher = tgrepPattern.matcher(tree); Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap(); while (matcher.find()) { matcher.getMatch(); Tree m1 = matcher.getNode("m1"); Tree m2 = matcher.getNode("m2"); List<Tree> mLeaves = m1.getLeaves(); int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1; int endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class); spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1); mLeaves = m2.getLeaves(); beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1; endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class); spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2); } for(IntPair mSpan : spanToMentionSubTree.keySet()){ if(!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, mSpan.get(0), mSpan.get(1), dependency, new ArrayList<>(sent.subList(mSpan.get(0), mSpan.get(1))), spanToMentionSubTree.get(mSpan)); mentions.add(m); mentionSpanSet.add(mSpan); } } }
while (matcher.find()) { Tree t = matcher.getMatch(); List<Tree> mLeaves = t.getLeaves();
TregexMatcher matcher = tgrepPattern.matcher(tree); Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap(); while (matcher.find()) { matcher.getMatch(); Tree m1 = matcher.getNode("m1");
while (matcher.find()) { Tree verbNode = matcher.getNode("vb"); String verb = verbNode.value();
public Tree transformTree(Tree t) { TregexMatcher matcher = tregexMonthYear.matcher(t); while (matcher.find()) { Tree root = matcher.getNode("root"); Tree month = matcher.getNode("month"); Tree year = matcher.getNode("year"); Tree[] children = new Tree[] {month, year}; root.setChildren(children); matcher = tregexMonthYear.matcher(t); } matcher = tregexMonthDayYear.matcher(t); while (matcher.find()) { Tree root = matcher.getNode("root"); Tree month = matcher.getNode("month"); Tree day = matcher.getNode("day"); Tree comma = matcher.getNode("comma"); Tree year = matcher.getNode("year"); Tree[] children = new Tree[] {month, day, comma, year}; root.setChildren(children); matcher = tregexMonthDayYear.matcher(t); } return t; } }
@SuppressWarnings("StringContatenationInLoop") public static Tree processPatternsOnTree(List<Pair<TregexPattern, TsurgeonPattern>> ops, Tree t) { matchedOnTree = false; for (Pair<TregexPattern,TsurgeonPattern> op : ops) { try { if (DEBUG) { log.info("Running pattern " + op.first()); } TregexMatcher m = op.first().matcher(t); TsurgeonMatcher tsm = op.second().matcher(); while (m.find()) { matchedOnTree = true; t = tsm.evaluate(t,m); if (t == null) { return null; } m = op.first().matcher(t); } } catch (NullPointerException npe) { throw new RuntimeException("Tsurgeon.processPatternsOnTree failed to match label for pattern: " + op.first() + ", " + op.second(), npe); } } return t; }