/** * Compile a pattern string and return a matcher that can be used to check if string values * match the pattern. Pattern matchers are can be reused many times and are thread safe. */ public static PatternMatcher compile(String pattern) { String p = pattern; boolean ignoreCase = false; if (p.startsWith("(?i)")) { ignoreCase = true; p = pattern.substring(4); } if (p.length() > 0) { p = "^.*(" + p + ").*$"; } Parser parser = new Parser(PatternUtils.expandEscapedChars(p)); Matcher m = Optimizer.optimize(parser.parse()); return ignoreCase ? m.ignoreCase() : m; }
/** Return a new instance of the matcher that has been optimized. */ static Matcher optimize(Matcher matcher) { Matcher m = matcher; Matcher opt = optimizeSinglePass(m); for (int i = 0; !m.equals(opt) && i < MAX_ITERATIONS; ++i) { m = opt; opt = optimizeSinglePass(m); } return opt; }
@Test public void combineAdjacentStart() { Matcher input = SeqMatcher.create( StartMatcher.INSTANCE, StartMatcher.INSTANCE, StartMatcher.INSTANCE, StartMatcher.INSTANCE, AnyMatcher.INSTANCE ); Matcher expected = SeqMatcher.create(StartMatcher.INSTANCE, AnyMatcher.INSTANCE); Assertions.assertEquals(expected, Optimizer.combineAdjacentStart(input)); }
@Test public void convertEmptyCharClassToFalse() { Matcher input = new CharClassMatcher(AsciiSet.none()); Matcher expected = FalseMatcher.INSTANCE; Assertions.assertEquals(expected, Optimizer.convertEmptyCharClassToFalse(input)); }
@Test public void convertSingleCharClassToSeq() { Matcher input = new CharClassMatcher(AsciiSet.fromPattern("a")); Matcher expected = new CharSeqMatcher('a'); Assertions.assertEquals(expected, Optimizer.convertSingleCharClassToSeq(input)); }
@Test public void combineCharSeqAfterIndexOf() { Matcher input = new IndexOfMatcher("ab", new CharSeqMatcher("cd")); Matcher expected = new IndexOfMatcher("abcd", TrueMatcher.INSTANCE); Assertions.assertEquals(expected, Optimizer.combineCharSeqAfterIndexOf(input)); }
@Test public void combineCharSeqAfterStartsWith() { Matcher input = SeqMatcher.create( new StartsWithMatcher("a"), new CharSeqMatcher("b"), AnyMatcher.INSTANCE ); Matcher expected = SeqMatcher.create( new StartsWithMatcher("ab"), AnyMatcher.INSTANCE ); Assertions.assertEquals(expected, Optimizer.combineCharSeqAfterStartsWith(input)); }
@Test public void dedupOr() { Matcher input = OrMatcher.create( new CharSeqMatcher("a"), new CharSeqMatcher("b"), new CharSeqMatcher("a") ); Matcher expected = OrMatcher.create( new CharSeqMatcher("a"), new CharSeqMatcher("b") ); Assertions.assertEquals(expected, Optimizer.dedupOr(input)); }
@Test public void combineAdjacentCharSeqs() { Matcher input = SeqMatcher.create( new CharSeqMatcher("a"), new CharSeqMatcher("b"), AnyMatcher.INSTANCE, new CharSeqMatcher("c"), new CharSeqMatcher("d") ); Matcher expected = SeqMatcher.create( new CharSeqMatcher("ab"), AnyMatcher.INSTANCE, new CharSeqMatcher("cd") ); Assertions.assertEquals(expected, Optimizer.combineAdjacentCharSeqs(input)); }
@Test public void convertRepeatedAnyCharSeqToIndexOf() { Matcher input = new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, SeqMatcher.create( new CharSeqMatcher("abc"), new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, SeqMatcher.create( new CharSeqMatcher("def"), new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, TrueMatcher.INSTANCE) )) )); Matcher expected = SeqMatcher.create( new IndexOfMatcher( "abc", new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, SeqMatcher.create( new CharSeqMatcher("def"), new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, TrueMatcher.INSTANCE) )) ) ); Assertions.assertEquals(expected, Optimizer.convertRepeatedAnyCharSeqToIndexOf(input)); }
/** Return a new instance of the matcher that has been optimized. */ static Matcher optimize(Matcher matcher) { Matcher m = matcher; Matcher opt = optimizeSinglePass(m); for (int i = 0; !m.equals(opt) && i < MAX_ITERATIONS; ++i) { m = opt; opt = optimizeSinglePass(m); } return opt; }
/** * Compile a pattern string and return a matcher that can be used to check if string values * match the pattern. Pattern matchers are can be reused many times and are thread safe. */ public static PatternMatcher compile(String pattern) { String p = pattern; boolean ignoreCase = false; if (p.startsWith("(?i)")) { ignoreCase = true; p = pattern.substring(4); } if (p.length() > 0) { p = "^.*(" + p + ").*$"; } Parser parser = new Parser(PatternUtils.expandEscapedChars(p)); Matcher m = Optimizer.optimize(parser.parse()); return ignoreCase ? m.ignoreCase() : m; }