@Override public boolean isEndAnchored() { boolean anchored = true; for (Matcher m : matchers) { anchored &= m.isEndAnchored(); } return anchored; }
@Override public boolean isStartAnchored() { boolean anchored = true; for (Matcher m : matchers) { anchored &= m.isStartAnchored(); } return anchored; }
final int end = start + length; if (repeated instanceof AnyMatcher) { final int stop = end - next.minLength(); for (int pos = start; pos >= 0 && pos <= stop; ++pos) { int p = next.matches(str, pos, end - pos); if (p >= 0) { return p; final int stop = end - next.minLength(); int pos = start; while (pos >= 0 && pos <= stop) { int p = next.matches(str, pos, end - pos); if (p >= 0) { return p; pos = repeated.matches(str, pos, end - pos); if (pos == start) { return Constants.NO_MATCH; while (pos > matchPos) { matchPos = pos; pos = repeated.matches(str, pos, end - pos);
private static Matcher optimizeSinglePass(Matcher matcher) { return matcher .rewrite(Optimizer::mergeNext) .rewrite(Optimizer::removeTrueInSequence) .rewrite(Optimizer::sequenceWithFalseIsFalse) .rewrite(Optimizer::sequenceWithStuffAfterEndIsFalse) .rewrite(Optimizer::zeroOrMoreFalse) .rewrite(Optimizer::convertEmptyCharClassToFalse) .rewrite(Optimizer::convertSingleCharClassToSeq) .rewrite(Optimizer::removeStartFollowedByMatchAny) .rewrite(Optimizer::removeMatchAnyFollowedByStart) .rewrite(Optimizer::removeMatchAnyFollowedByIndexOf) .rewrite(Optimizer::removeSequentialMatchAny) .rewrite(Optimizer::flattenNestedSequences) .rewrite(Optimizer::flattenNestedOr) .rewrite(Optimizer::dedupOr) .rewrite(Optimizer::removeFalseBranchesFromOr) .rewrite(Optimizer::extractPrefixFromOr) .rewrite(Optimizer::inlineMatchAnyPrecedingOr) .rewrite(Optimizer::startsWithCharSeq) .rewrite(Optimizer::combineCharSeqAfterStartsWith) .rewrite(Optimizer::combineCharSeqAfterIndexOf) .rewrite(Optimizer::combineAdjacentCharSeqs) .rewrite(Optimizer::removeRepeatedStart) .rewrite(Optimizer::combineAdjacentStart) .rewrite(Optimizer::convertRepeatedAnyCharSeqToIndexOf) .rewriteEnd(Optimizer::removeTrailingMatchAny); }
/** Returns the first matcher from a sequence. */ static Matcher head(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return ms.get(0); } else { return matcher; } }
/** * If a sequence contains content after an end anchor then it will never be able to match * and can be treated as false. */ static Matcher sequenceWithStuffAfterEndIsFalse(Matcher matcher) { if (matcher instanceof SeqMatcher) { boolean end = false; for (Matcher m : matcher.<SeqMatcher>as().matchers()) { if (m instanceof EndMatcher) { end = true; } else if (end && !m.alwaysMatches()) { return FalseMatcher.INSTANCE; } } } return matcher; }
@Override public Matcher rewriteEnd(Function<Matcher, Matcher> f) { List<Matcher> ms = new ArrayList<>(); for (int i = 0; i < matchers.length - 1; ++i) { ms.add(matchers[i]); } ms.add(matchers[matchers.length - 1].rewriteEnd(f)); return f.apply(SeqMatcher.create(ms)); }
@Override public int matches(String str, int start, int length) { final int end = start + length; int pos = start; for (int i = 0; i < matchers.length && pos >= 0; ++i) { pos = matchers[i].matches(str, pos, end - pos); } return pos; }
/** Create a new instance. */ private SeqMatcher(Matcher... matchers) { this.matchers = matchers; int min = 0; for (Matcher matcher : matchers) { min += matcher.minLength(); } this.minLength = min; }
/** * If a start anchor is preceded by a repeated any match, then the any match can be removed * as it must be empty for the start anchor to match ({@code ".*^" => "^"}). */ static Matcher removeMatchAnyFollowedByStart(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && zm.next() instanceof SeqMatcher && zm.next().<SeqMatcher>as().matchers().get(0).isStartAnchored()) { return zm.next(); } } return matcher; }
@Override public String prefix() { return matchers[0].prefix(); }
/** * Compile a pattern string and return a matcher that can be used to check if string values * match the pattern. Pattern matchers are can be reused many times and are thread safe. */ public static PatternMatcher compile(String pattern) { String p = pattern; boolean ignoreCase = false; if (p.startsWith("(?i)")) { ignoreCase = true; p = pattern.substring(4); } if (p.length() > 0) { p = "^.*(" + p + ").*$"; } Parser parser = new Parser(PatternUtils.expandEscapedChars(p)); Matcher m = Optimizer.optimize(parser.parse()); return ignoreCase ? m.ignoreCase() : m; }
/** Returns the first matcher from a sequence. */ static Matcher head(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return ms.get(0); } else { return matcher; } }
/** * If a sequence contains content after an end anchor then it will never be able to match * and can be treated as false. */ static Matcher sequenceWithStuffAfterEndIsFalse(Matcher matcher) { if (matcher instanceof SeqMatcher) { boolean end = false; for (Matcher m : matcher.<SeqMatcher>as().matchers()) { if (m instanceof EndMatcher) { end = true; } else if (end && !m.alwaysMatches()) { return FalseMatcher.INSTANCE; } } } return matcher; }
@Override public Matcher rewriteEnd(Function<Matcher, Matcher> f) { List<Matcher> ms = new ArrayList<>(); for (Matcher m : matchers) { ms.add(m.rewriteEnd(f)); } return f.apply(OrMatcher.create(ms)); }
@Override public int matches(String str, int start, int length) { int pos = matcher.matches(str, start, length); return (pos >= 0) ? start : Constants.NO_MATCH; }
/** Create a new instance. */ private SeqMatcher(Matcher... matchers) { this.matchers = matchers; int min = 0; for (Matcher matcher : matchers) { min += matcher.minLength(); } this.minLength = min; }
/** * If a start anchor is preceded by a repeated any match, then the any match can be removed * as it must be empty for the start anchor to match ({@code ".*^" => "^"}). */ static Matcher removeMatchAnyFollowedByStart(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && zm.next() instanceof SeqMatcher && zm.next().<SeqMatcher>as().matchers().get(0).isStartAnchored()) { return zm.next(); } } return matcher; }