private Matcher quotation() { int start = current + 1; int end = tokens.indexOf("\\E", start); if (end == -1) { throw error("unclosed quotation"); } current = end + 2; return new CharSeqMatcher(tokens.substring(start, end)); }
private Matcher quotation() { int start = current + 1; int end = tokens.indexOf("\\E", start); if (end == -1) { throw error("unclosed quotation"); } current = end + 2; return new CharSeqMatcher(tokens.substring(start, end)); }
/** * If a character class has a single value, then just match that value ({@code "[a]" => "a"}). * This allows other optimizations to merge the value into adjacent matchers to get a larger * prefix or indexOf check. */ static Matcher convertSingleCharClassToSeq(Matcher matcher) { if (matcher instanceof CharClassMatcher) { Optional<Character> opt = matcher.<CharClassMatcher>as().set().character(); if (opt.isPresent()) { return new CharSeqMatcher(opt.get()); } } return matcher; }
/** * If a character class has a single value, then just match that value ({@code "[a]" => "a"}). * This allows other optimizations to merge the value into adjacent matchers to get a larger * prefix or indexOf check. */ static Matcher convertSingleCharClassToSeq(Matcher matcher) { if (matcher instanceof CharClassMatcher) { Optional<Character> opt = matcher.<CharClassMatcher>as().set().character(); if (opt.isPresent()) { return new CharSeqMatcher(opt.get()); } } return matcher; }
@Test public void combineAdjacentCharSeqs() { Matcher input = SeqMatcher.create( new CharSeqMatcher("a"), new CharSeqMatcher("b"), AnyMatcher.INSTANCE, new CharSeqMatcher("c"), new CharSeqMatcher("d") ); Matcher expected = SeqMatcher.create( new CharSeqMatcher("ab"), AnyMatcher.INSTANCE, new CharSeqMatcher("cd") ); Assertions.assertEquals(expected, Optimizer.combineAdjacentCharSeqs(input)); }
@Test public void dedupOr() { Matcher input = OrMatcher.create( new CharSeqMatcher("a"), new CharSeqMatcher("b"), new CharSeqMatcher("a") ); Matcher expected = OrMatcher.create( new CharSeqMatcher("a"), new CharSeqMatcher("b") ); Assertions.assertEquals(expected, Optimizer.dedupOr(input)); }
@Test public void removeFalseBranchesFromOr() { Matcher input = OrMatcher.create( new CharSeqMatcher("a"), FalseMatcher.INSTANCE, new CharSeqMatcher("b") ); Matcher expected = OrMatcher.create( new CharSeqMatcher("a"), new CharSeqMatcher("b") ); Assertions.assertEquals(expected, Optimizer.removeFalseBranchesFromOr(input)); }
@Test public void optimizeOrPrefix() { PatternMatcher actual = PatternMatcher.compile("^(abc123|abc456)"); PatternMatcher expected = SeqMatcher.create( new StartsWithMatcher("abc"), new OrMatcher(new CharSeqMatcher("123"), new CharSeqMatcher("456")) ); Assertions.assertEquals(expected, actual); }
@Test public void optimizeNegativeLookaheadOr() { PatternMatcher actual = PatternMatcher.compile("^^abc.def(?!.*(1000|1500))"); PatternMatcher expected = SeqMatcher.create( new StartsWithMatcher("abc"), AnyMatcher.INSTANCE, new CharSeqMatcher("def"), new NegativeLookaheadMatcher(new IndexOfMatcher( "1", OrMatcher.create(new CharSeqMatcher("000"), new CharSeqMatcher("500")) )) ); Assertions.assertEquals(expected, actual); } }
@Test public void combineCharSeqAfterIndexOf() { Matcher input = new IndexOfMatcher("ab", new CharSeqMatcher("cd")); Matcher expected = new IndexOfMatcher("abcd", TrueMatcher.INSTANCE); Assertions.assertEquals(expected, Optimizer.combineCharSeqAfterIndexOf(input)); }
@Test public void optimizeEndsWithPattern() { PatternMatcher actual = PatternMatcher.compile(".*foo.bar$"); PatternMatcher expected = new IndexOfMatcher( "foo", SeqMatcher.create( AnyMatcher.INSTANCE, new CharSeqMatcher("bar"), EndMatcher.INSTANCE ) ); Assertions.assertEquals(expected, actual); }
private Matcher escape() { char c = peek(); if (c == 'Q') { return quotation(); } else if (c == 'c') { throw unsupported("control character"); } else if (Constants.DIGIT.contains(c) || c == 'k') { throw unsupported("back references"); } else { AsciiSet set = namedCharClass(); if (set == null) { advance(); return new CharSeqMatcher(String.valueOf(c)); } else { return new CharClassMatcher(set); } } }
@Test public void convertRepeatedAnyCharSeqToIndexOf() { Matcher input = new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, SeqMatcher.create( new CharSeqMatcher("abc"), new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, SeqMatcher.create( new CharSeqMatcher("def"), new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, TrueMatcher.INSTANCE) )) )); Matcher expected = SeqMatcher.create( new IndexOfMatcher( "abc", new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, SeqMatcher.create( new CharSeqMatcher("def"), new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, TrueMatcher.INSTANCE) )) ) ); Assertions.assertEquals(expected, Optimizer.convertRepeatedAnyCharSeqToIndexOf(input)); }
@Test public void optimizeOrIndexOf() { PatternMatcher actual = PatternMatcher.compile("^.*abc.*|.*def.*|.*ghi*."); PatternMatcher expected = OrMatcher.create( new IndexOfMatcher("abc", TrueMatcher.INSTANCE), new IndexOfMatcher("def", TrueMatcher.INSTANCE), new IndexOfMatcher( "gh", new ZeroOrMoreMatcher(new CharSeqMatcher("i"), AnyMatcher.INSTANCE) ) ); Assertions.assertEquals(expected, actual); }
@Test public void inlineMatchAnyPrecedingOr() { Matcher a = new CharSeqMatcher("a"); Matcher b = new CharSeqMatcher("b"); Matcher input = new ZeroOrMoreMatcher( AnyMatcher.INSTANCE, OrMatcher.create(a, b) ); Matcher expected = OrMatcher.create( new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, a), new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, b) ); Assertions.assertEquals(expected, Optimizer.inlineMatchAnyPrecedingOr(input)); }
@Test public void convertSingleCharClassToSeq() { Matcher input = new CharClassMatcher(AsciiSet.fromPattern("a")); Matcher expected = new CharSeqMatcher('a'); Assertions.assertEquals(expected, Optimizer.convertSingleCharClassToSeq(input)); }
@Test public void extractPrefixFromOr() { Matcher a = new CharSeqMatcher("a"); Matcher b = new CharSeqMatcher("b"); Matcher input = OrMatcher.create( new ZeroOrMoreMatcher(a, AnyMatcher.INSTANCE), new ZeroOrMoreMatcher(a, a), new ZeroOrMoreMatcher(a, b) ); Matcher expected = SeqMatcher.create( new ZeroOrMoreMatcher(a, TrueMatcher.INSTANCE), OrMatcher.create(AnyMatcher.INSTANCE, a, b) ); Assertions.assertEquals(expected, Optimizer.extractPrefixFromOr(input)); }
@Test public void optimizeOptionValue() { PatternMatcher actual = PatternMatcher.compile("^a?a"); PatternMatcher expected = SeqMatcher.create( new StartsWithMatcher("a"), OrMatcher.create(new CharSeqMatcher("a"), TrueMatcher.INSTANCE) ); Assertions.assertEquals(expected, actual); }
@Test public void startsWithCharSeq() { Matcher input = SeqMatcher.create( StartMatcher.INSTANCE, new CharSeqMatcher("ab"), AnyMatcher.INSTANCE ); Matcher expected = SeqMatcher.create( new StartsWithMatcher("ab"), AnyMatcher.INSTANCE ); Assertions.assertEquals(expected, Optimizer.startsWithCharSeq(input)); }
@Test public void combineCharSeqAfterStartsWith() { Matcher input = SeqMatcher.create( new StartsWithMatcher("a"), new CharSeqMatcher("b"), AnyMatcher.INSTANCE ); Matcher expected = SeqMatcher.create( new StartsWithMatcher("ab"), AnyMatcher.INSTANCE ); Assertions.assertEquals(expected, Optimizer.combineCharSeqAfterStartsWith(input)); }