/** * Create a multi-pattern matcher for matching across multiple TokensRegex patterns. * * @param patterns Input patterns * @return A MultiPatternMatcher */ public static MultiPatternMatcher<CoreMap> getMultiPatternMatcher(TokenSequencePattern... patterns) { return new MultiPatternMatcher<>( new MultiPatternMatcher.BasicSequencePatternTrigger<>(new CoreMapNodePatternTrigger(patterns)), patterns); }
/** * Given a sequence, applies our patterns over the sequence and returns * all non overlapping matches. When multiple patterns overlaps, * matched patterns are selected by * the highest priority/score is selected, * then the longest pattern, * then the starting offset, * then the original order. * * @param elements input sequence to match against * @return list of match results that are non-overlapping */ public List<SequenceMatchResult<T>> findNonOverlapping(List<? extends T> elements) { return findNonOverlapping(elements, SequenceMatchResult.DEFAULT_COMPARATOR); }
/** * Given a sequence, applies each of our patterns over the sequence and returns * all non overlapping matches for each of the patterns. * Unlike #findAllNonOverlapping, overlapping matches from different patterns are kept. * * @param elements input sequence to match against * @return iterable of match results that are non-overlapping */ public Iterable<SequenceMatchResult<T>> findAllNonOverlappingMatchesPerPattern(List<? extends T> elements) { Collection<SequencePattern<T>> triggered = getTriggeredPatterns(elements); List<Iterable<SequenceMatchResult<T>>> allMatches = new ArrayList<>(elements.size()); for (SequencePattern<T> p:triggered) { SequenceMatcher<T> m = p.getMatcher(elements); m.setMatchWithResult(matchWithResult); Iterable<SequenceMatchResult<T>> matches = m.findAllNonOverlapping(); allMatches.add(matches); } return Iterables.chain(allMatches); }
/** * Given a sequence, applies our patterns over the sequence and returns * all non overlapping matches. When multiple patterns overlaps, * matched patterns are selected to give the overall maximum score * @param elements input sequence to match against * @return list of match results that are non-overlapping */ public List<SequenceMatchResult<T>> findNonOverlappingMaxScore(List<? extends T> elements) { return findNonOverlappingMaxScore(elements, SequenceMatchResult.SCORER); }
protected static AnnotationExtractRule create(Env env, SequenceMatchRules.AnnotationExtractRule aerTemplate, List<TokenSequencePattern> patterns) { AnnotationExtractRule r = new AnnotationExtractRule(); r.stage = aerTemplate.stage; r.active = aerTemplate.active; r.priority = Double.NaN; // Priority from patterns? r.weight = Double.NaN; // weight from patterns? r.annotationField = aerTemplate.annotationField; r.tokensAnnotationField = aerTemplate.tokensAnnotationField; r.tokensResultAnnotationField = aerTemplate.tokensResultAnnotationField; r.resultAnnotationField = aerTemplate.resultAnnotationField; r.resultNestedAnnotationField = aerTemplate.resultNestedAnnotationField; r.matchFindType = aerTemplate.matchFindType; r.matchedExpressionGroup = aerTemplate.matchedExpressionGroup; r.matchWithResults = aerTemplate.matchWithResults; r.ruleType = aerTemplate.ruleType; r.isComposite = aerTemplate.isComposite; r.includeNested = aerTemplate.includeNested; r.active = aerTemplate.active; r.result = aerTemplate.result; if (r.annotationField == null) { r.annotationField = r.tokensAnnotationField; } r.ruleType = TOKEN_PATTERN_RULE_TYPE; MultiPatternMatcher<CoreMap> multiPatternMatcher = TokenSequencePattern.getMultiPatternMatcher(patterns); multiPatternMatcher.setMatchWithResult(r.matchWithResults); updateExtractRule(r, env, multiPatternMatcher, null, r.result); return r; }
Iterable<SequenceMatchResult<CoreMap>> matched = multiPatternMatcher.find(sent, SequenceMatcher.FindType.FIND_ALL);
List<CoreLabel> sent = sents.get(sentid); Iterable<SequenceMatchResult<CoreMap>> matched = multiPatternMatcher.findAllNonOverlappingMatchesPerPattern(sent); for (SequenceMatchResult<CoreMap> m: matched) { int s = m.start("$term");
@Override public boolean extract(List<? extends T> seq, List<O> out) { if (seq == null) return false; boolean extracted = false; List<SequenceMatchResult<T>> matched = matcher.findNonOverlappingMaxScore(seq); for (SequenceMatchResult<T> m : matched) { out.add(extractor.apply(m)); extracted = true; } return extracted; }
protected static AnnotationExtractRule create(Env env, SequenceMatchRules.AnnotationExtractRule aerTemplate, List<TokenSequencePattern> patterns) { AnnotationExtractRule r = new AnnotationExtractRule(); r.stage = aerTemplate.stage; r.active = aerTemplate.active; r.priority = Double.NaN; // Priority from patterns? r.weight = Double.NaN; // weight from patterns? r.annotationField = aerTemplate.annotationField; r.tokensAnnotationField = aerTemplate.tokensAnnotationField; r.tokensResultAnnotationField = aerTemplate.tokensResultAnnotationField; r.resultAnnotationField = aerTemplate.resultAnnotationField; r.resultNestedAnnotationField = aerTemplate.resultNestedAnnotationField; r.matchFindType = aerTemplate.matchFindType; r.matchedExpressionGroup = aerTemplate.matchedExpressionGroup; r.matchWithResults = aerTemplate.matchWithResults; r.ruleType = aerTemplate.ruleType; r.isComposite = aerTemplate.isComposite; r.includeNested = aerTemplate.includeNested; r.active = aerTemplate.active; r.result = aerTemplate.result; if (r.annotationField == null) { r.annotationField = r.tokensAnnotationField; } r.ruleType = TOKEN_PATTERN_RULE_TYPE; MultiPatternMatcher<CoreMap> multiPatternMatcher = TokenSequencePattern.getMultiPatternMatcher(patterns); multiPatternMatcher.setMatchWithResult(r.matchWithResults); updateExtractRule(r, env, multiPatternMatcher, null, r.result); return r; }
/** * Given a sequence, applies our patterns over the sequence and returns * all non overlapping matches. When multiple patterns overlaps, * matched patterns are selected to give the overall maximum score. * * @param elements input sequence to match against * @param scorer scorer for scoring each match * @return list of match results that are non-overlapping */ public List<SequenceMatchResult<T>> findNonOverlappingMaxScore(List<? extends T> elements, ToDoubleFunction<? super SequenceMatchResult> scorer) { Collection<SequencePattern<T>> triggered = getTriggeredPatterns(elements); List<SequenceMatchResult<T>> all = new ArrayList<>(); int i = 0; for (SequencePattern<T> p:triggered) { SequenceMatcher<T> m = p.getMatcher(elements); m.setMatchWithResult(matchWithResult); m.setOrder(i); while (m.find()) { all.add(m.toBasicSequenceMatchResult()); } i++; } List<SequenceMatchResult<T>> res = IntervalTree.getNonOverlappingMaxScore( all, SequenceMatchResult.TO_INTERVAL, scorer); res.sort(SequenceMatchResult.OFFSET_COMPARATOR); return res; }
List<SequenceMatchResult<CoreMap>> answers = multiMatcher.findNonOverlapping(tokens); int j = 0; for (SequenceMatchResult<CoreMap> matched : answers) {
/** * Create a multi-pattern matcher for matching across multiple TokensRegex patterns. * * @param patterns Collection of input patterns * @return A MultiPatternMatcher */ public static MultiPatternMatcher<CoreMap> getMultiPatternMatcher(Collection<TokenSequencePattern> patterns) { return new MultiPatternMatcher<>( new MultiPatternMatcher.BasicSequencePatternTrigger<>(new CoreMapNodePatternTrigger(patterns)), patterns); }
@Override public O apply(List<? extends T> seq) { if (seq == null) return null; List<SequenceMatchResult<T>> matched = matcher.findNonOverlappingMaxScore(seq); if ( ! matched.isEmpty()) { return extractor.apply(matched.get(0)); } else { return null; } } }
protected static AnnotationExtractRule create(Env env, SequenceMatchRules.AnnotationExtractRule aerTemplate, List<TokenSequencePattern> patterns) { AnnotationExtractRule r = new AnnotationExtractRule(); r.stage = aerTemplate.stage; r.active = aerTemplate.active; r.priority = Double.NaN; // Priority from patterns? r.weight = Double.NaN; // weight from patterns? r.annotationField = aerTemplate.annotationField; r.tokensAnnotationField = aerTemplate.tokensAnnotationField; r.tokensResultAnnotationField = aerTemplate.tokensResultAnnotationField; r.resultAnnotationField = aerTemplate.resultAnnotationField; r.resultNestedAnnotationField = aerTemplate.resultNestedAnnotationField; r.matchFindType = aerTemplate.matchFindType; r.matchedExpressionGroup = aerTemplate.matchedExpressionGroup; r.matchWithResults = aerTemplate.matchWithResults; r.ruleType = aerTemplate.ruleType; r.isComposite = aerTemplate.isComposite; r.includeNested = aerTemplate.includeNested; r.active = aerTemplate.active; r.result = aerTemplate.result; if (r.annotationField == null) { r.annotationField = r.tokensAnnotationField; } r.ruleType = TOKEN_PATTERN_RULE_TYPE; MultiPatternMatcher<CoreMap> multiPatternMatcher = TokenSequencePattern.getMultiPatternMatcher(patterns); multiPatternMatcher.setMatchWithResult(r.matchWithResults); updateExtractRule(r, env, multiPatternMatcher, null, r.result); return r; }
/** * Given a sequence, applies our patterns over the sequence and returns * all non overlapping matches. When multiple patterns overlaps, * matched patterns are selected by order specified by the comparator * @param elements input sequence to match against * @param cmp comparator indicating order that overlapped sequences should be selected. * @return list of match results that are non-overlapping */ public List<SequenceMatchResult<T>> findNonOverlapping(List<? extends T> elements, Comparator<? super SequenceMatchResult> cmp) { Collection<SequencePattern<T>> triggered = getTriggeredPatterns(elements); List<SequenceMatchResult<T>> all = new ArrayList<>(); int i = 0; for (SequencePattern<T> p:triggered) { if (Thread.interrupted()) { // Allow interrupting throw new RuntimeInterruptedException(); } SequenceMatcher<T> m = p.getMatcher(elements); m.setMatchWithResult(matchWithResult); m.setOrder(i); while (m.find()) { all.add(m.toBasicSequenceMatchResult()); } i++; } List<SequenceMatchResult<T>> res = IntervalTree.getNonOverlapping( all, SequenceMatchResult.TO_INTERVAL, cmp); res.sort(SequenceMatchResult.OFFSET_COMPARATOR); return res; }
List<SequenceMatchResult<CoreMap>> answers = multiMatcher.findNonOverlapping(tokens); int j = 0; for (SequenceMatchResult<CoreMap> matched : answers) {
/** * Create a multi-pattern matcher for matching across multiple TokensRegex patterns. * * @param patterns Input patterns * @return A MultiPatternMatcher */ public static MultiPatternMatcher<CoreMap> getMultiPatternMatcher(TokenSequencePattern... patterns) { return new MultiPatternMatcher<>( new MultiPatternMatcher.BasicSequencePatternTrigger<>(new CoreMapNodePatternTrigger(patterns)), patterns); }
/** * Given a sequence, applies our patterns over the sequence and returns * all non overlapping matches. When multiple patterns overlaps, * matched patterns are selected to give the overall maximum score * @param elements input sequence to match against * @return list of match results that are non-overlapping */ public List<SequenceMatchResult<T>> findNonOverlappingMaxScore(List<? extends T> elements) { return findNonOverlappingMaxScore(elements, SequenceMatchResult.SCORER); }
/** * Given a sequence, applies our patterns over the sequence and returns * all matches, depending on the findType. When multiple patterns overlaps, * matched patterns are selected by order specified by the comparator * @param elements input sequence to match against * @param findType whether FindType.FIND_ALL or FindType.FIND_NONOVERLAPPING * @return list of match results */ public List<SequenceMatchResult<T>> find(List<? extends T> elements, SequenceMatcher.FindType findType) { Collection<SequencePattern<T>> triggered = getTriggeredPatterns(elements); List<SequenceMatchResult<T>> all = new ArrayList<>(); int i = 0; for (SequencePattern<T> p:triggered) { if (Thread.interrupted()) { // Allow interrupting throw new RuntimeInterruptedException(); } SequenceMatcher<T> m = p.getMatcher(elements); m.setMatchWithResult(matchWithResult); m.setFindType(findType); m.setOrder(i); while (m.find()) { all.add(m.toBasicSequenceMatchResult()); } i++; } List<SequenceMatchResult<T>> res = IntervalTree.getNonOverlapping( all, SequenceMatchResult.TO_INTERVAL, SequenceMatchResult.DEFAULT_COMPARATOR); res.sort(SequenceMatchResult.OFFSET_COMPARATOR); return res; }
private void annotateMatched(List<CoreLabel> tokens) { List<SequenceMatchResult<CoreMap>> matched = multiPatternMatcher.findNonOverlapping(tokens); for (SequenceMatchResult<CoreMap> m:matched) { Entry entry = patternToEntry.get(m.pattern());