/** * Returns a TokenSequenceMatcher that can be used to match this pattern * against the specified list of tokens. * * @param tokens List of tokens to match against * @return TokenSequenceMatcher */ public TokenSequenceMatcher matcher(List<? extends CoreMap> tokens) { return getMatcher(tokens); }
/** * Compiles a PatternExpr into a TokenSequencePattern. * * @param nodeSequencePattern A sequence pattern expression (before translation into a NFA) * @return Compiled TokenSequencePattern */ public static TokenSequencePattern compile(SequencePattern.PatternExpr nodeSequencePattern) { return new TokenSequencePattern(null, nodeSequencePattern); }
int patternFlags = ignoreCaseEntry? Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE:0; int stringMatchFlags = ignoreCaseEntry? (NodePattern.CASE_INSENSITIVE | NodePattern.UNICODE_CASE):0; Env env = TokenSequencePattern.getNewEnv(); env.setDefaultStringPatternFlags(patternFlags); env.setDefaultStringMatchFlags(stringMatchFlags); if (entry.tokensRegex != null) { pattern = TokenSequencePattern.compile(env, entry.tokensRegex); } else { List<SequencePattern.PatternExpr> nodePatterns = new ArrayList<>(); pattern = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(nodePatterns)); if (entry.annotateGroup < 0 || entry.annotateGroup > pattern.getTotalGroups()) { throw new RuntimeException("Invalid match group for entry " + entry); pattern.setPriority(entry.priority); pattern.setWeight(entry.weight); patterns.add(pattern); patternToEntry.put(pattern, entry); return TokenSequencePattern.getMultiPatternMatcher(patterns);
"(?$who [ ner: PERSON]+ ) /'s/ /age/ /is/ (?$age [ pos: CD ] )" }; for (String line : patterns) { TokenSequencePattern pattern = TokenSequencePattern.compile(line); tokenSequencePatterns.add(pattern); MultiPatternMatcher<CoreMap> multiMatcher = TokenSequencePattern.getMultiPatternMatcher(tokenSequencePatterns);
final boolean filter = filterStr.trim().isEmpty() || "true".equalsIgnoreCase(filterStr.toLowerCase()); final TokenSequencePattern regex = TokenSequencePattern.compile(pattern); regex.matcher(sentence.get(CoreAnnotations.TokensAnnotation.class)).matches() ).collect(Collectors.toList())); } else { TokenSequenceMatcher matcher = regex.matcher(tokens); int i = 0; while (matcher.find()) {
/** * Compiles a regular expression over tokens into a TokenSequencePattern * using the default environment. * * @param string Regular expression to be compiled * @return Compiled TokenSequencePattern */ public static TokenSequencePattern compile(String string) { return compile(DEFAULT_ENV, string); }
public TokensRegexAnnotator(String... files) { env = TokenSequencePattern.getNewEnv(); extractor = CoreMapExpressionExtractor.createExtractorFromFiles(env, files); verbose = false; }
TokenSequencePattern pattern = TokenSequencePattern.compile(env, "(?:$NUMCOMPTERM /-|to/ $NUMCOMPTERM) | $NUMRANGE"); TokenSequenceMatcher matcher = pattern.getMatcher(numerizedTokens); while (matcher.find()) { List<? extends CoreMap> matched = matcher.groupNodes();
/** Returns a String representation of the TokenSequencePattern. * * @return A String representation of the TokenSequencePattern */ @Override public String toString(){ return this.pattern(); }
public ApplyPatternsMulti(Map<String, DataInstance> sents, List<String> sentids, Map<TokenSequencePattern, E> patterns, String label, boolean removeStopWordsFromSelectedPhrases, boolean removePhrasesWithStopWords, ConstantsAndVariables cv) { this.sents = sents; this.patterns = patterns; multiPatternMatcher = TokenSequencePattern.getMultiPatternMatcher(patterns.keySet()); this.sentids = sentids; this.label = label; this.removeStopWordsFromSelectedPhrases = removeStopWordsFromSelectedPhrases; this.removePhrasesWithStopWords = removePhrasesWithStopWords; this.constVars = cv; }
public static TokenSequencePattern compile(Env env, String string) { try { // SequencePattern.PatternExpr nodeSequencePattern = TokenSequenceParser.parseSequence(env, string); // return new TokenSequencePattern(string, nodeSequencePattern); // TODO: Check token sequence parser? Pair<PatternExpr, SequenceMatchAction<CoreMap>> p = env.parser.parseSequenceWithAction(env, string); return new TokenSequencePattern(string, p.first(), p.second()); } catch (Exception ex) { throw new RuntimeException(ex); } }
private void initEnv() env = TokenSequencePattern.getNewEnv(); env.bind("$TEUNIT", "/" + teUnit.pattern() + "/"); env.bind("$NUM", TokenSequencePattern.compile(env, "[ { numcomptype:NUMBER } ]")); env.bind("$INT", TokenSequencePattern.compile(env, " [ { numcomptype:NUMBER } & !{ word:/.*\\.\\d+.*/} & !{ word:/.*,.*/ } ] ")); // TODO: Only recognize integers env.bind("$INT1000TO3000", TokenSequencePattern.compile(env, "[ $INT & { numcompvalue>1000 } & { numcompvalue<3000 } ] ")); env.bind("$NUM_ORD", TokenSequencePattern.compile(env, "[ { numcomptype:ORDINAL } ]")); env.bind("$INT_TIMES", TokenSequencePattern.compile(env, " $INT /times/ | once | twice | trice ")); env.bind("$REL_MOD", TokenSequencePattern.compile(env, "/next|last|previous/ | /this/ /coming|past/? ")); env.bind("$FREQ_MOD", TokenSequencePattern.compile(env, "/each/ | /every/ $NUM_ORD | /every/ /other|alternate|alternating/? | /alternate|alternating/ ")); env.bind("$EARLY_LATE_MOD", TokenSequencePattern.compile(env, "/late|early|mid-?/ | /the/? /beginning|start|dawn|middle|end/ /of/")); env.bind("$APPROX_MOD", TokenSequencePattern.compile(env, "/about|around|some|exactly|precisely/")); env.bind("$YEAR", "/[012]\\d\\d\\d|'\\d\\d/ | /\\w+teen/ [ { numcompvalue<=100 } & { numcompvalue>0 } & $INT ] "); env.bind("$POSSIBLE_YEAR", " $YEAR | $INT /a\\.?d\\.?|b\\.?c\\.?/ | $INT1000TO3000 "); env.bind("$TEUNITS_NODE", TokenSequencePattern.compile(env, "[ " + "/" + teUnit.pattern() + "s?/" + " & { tag:/NN.*/ } ]"));
/** * Compiles a sequence of regular expressions into a TokenSequencePattern * using the default environment. * * @param strings List of regular expression to be compiled * @return Compiled TokenSequencePattern */ public static TokenSequencePattern compile(String... strings) { return compile(DEFAULT_ENV, strings); }
public static Env getNewEnv() { Env env = TokenSequencePattern.getNewEnv(); // Do case insensitive matching env.setDefaultStringPatternFlags(Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); initEnv(env); return env; }
TokenSequencePattern pattern = TokenSequencePattern.compile(line); tokenSequencePatterns.add(pattern); MultiPatternMatcher<CoreMap> multiMatcher = TokenSequencePattern.getMultiPatternMatcher(tokenSequencePatterns); List<SequenceMatchResult<CoreMap>> answers = multiMatcher.findNonOverlapping(tokens); int j = 0;
/** Returns a String representation of the TokenSequencePattern. * * @return A String representation of the TokenSequencePattern */ @Override public String toString(){ return this.pattern(); }
/** * Create a multi-pattern matcher for matching across multiple TokensRegex patterns from Strings. * * @param patterns Input patterns in String format * @return A MultiPatternMatcher */ public static MultiPatternMatcher<CoreMap> getMultiPatternMatcher(String... patterns) { List<TokenSequencePattern> tokenSequencePatterns = Arrays.stream(patterns).map(TokenSequencePattern::compile) .collect(Collectors.toList()); return TokenSequencePattern.getMultiPatternMatcher(tokenSequencePatterns); }
int patternFlags = ignoreCaseEntry? Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE:0; int stringMatchFlags = ignoreCaseEntry? (NodePattern.CASE_INSENSITIVE | NodePattern.UNICODE_CASE):0; Env env = TokenSequencePattern.getNewEnv(); env.setDefaultStringPatternFlags(patternFlags); env.setDefaultStringMatchFlags(stringMatchFlags); if (entry.tokensRegex != null) { pattern = TokenSequencePattern.compile(env, entry.tokensRegex); } else { List<SequencePattern.PatternExpr> nodePatterns = new ArrayList<>(); pattern = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(nodePatterns)); if (entry.annotateGroup < 0 || entry.annotateGroup > pattern.getTotalGroups()) { throw new RuntimeException("Invalid match group for entry " + entry); pattern.setPriority(entry.priority); pattern.setWeight(entry.weight); patterns.add(pattern); patternToEntry.put(pattern, entry); return TokenSequencePattern.getMultiPatternMatcher(patterns);
/** * Apply a TokensRegex pattern to the sentence. * * @param pattern The TokensRegex pattern to match against. * @return the matcher. */ public boolean matches(TokenSequencePattern pattern) { return pattern.getMatcher(asCoreLabels()).matches(); }
/** * Apply a TokensRegex pattern to the sentence. * * @param pattern The TokensRegex pattern to match against. * @return True if the tokensregex pattern matches. */ public boolean matches(String pattern) { return matches(TokenSequencePattern.compile(pattern)); }