/** * If a character class is empty, then it will not match anything and can be treated * as false. */ static Matcher convertEmptyCharClassToFalse(Matcher matcher) { if (matcher instanceof CharClassMatcher) { return matcher.<CharClassMatcher>as().set().isEmpty() ? FalseMatcher.INSTANCE : matcher; } return matcher; }
@Override public int matches(String str, int start, int length) { if (length > 0) { char c = str.charAt(start); boolean matched = ignoreCase ? set.contains(c) || set.contains(toOtherCase(c)) : set.contains(c); return matched ? start + 1 : Constants.NO_MATCH; } else { return Constants.NO_MATCH; } }
break; case '[': matchers.add(new CharClassMatcher(charClass())); break; case '(':
/** Convert to a matchers that ignores the case. */ static Matcher ignoreCase(Matcher matcher) { if (matcher instanceof CharClassMatcher) { CharClassMatcher m = matcher.as(); return new CharClassMatcher(m.set(), true); } else if (matcher instanceof CharSeqMatcher) { CharSeqMatcher m = matcher.as(); return new CharSeqMatcher(m.pattern(), true); } else if (matcher instanceof IndexOfMatcher) { IndexOfMatcher m = matcher.as(); return new IndexOfMatcher(m.pattern(), m.next(), true); } else if (matcher instanceof StartsWithMatcher) { StartsWithMatcher m = matcher.as(); return new StartsWithMatcher(m.pattern(), true); } else { return matcher; } }
break; case '[': matchers.add(new CharClassMatcher(charClass())); break; case '(':
/** Convert to a matchers that ignores the case. */ static Matcher ignoreCase(Matcher matcher) { if (matcher instanceof CharClassMatcher) { CharClassMatcher m = matcher.as(); return new CharClassMatcher(m.set(), true); } else if (matcher instanceof CharSeqMatcher) { CharSeqMatcher m = matcher.as(); return new CharSeqMatcher(m.pattern(), true); } else if (matcher instanceof IndexOfMatcher) { IndexOfMatcher m = matcher.as(); return new IndexOfMatcher(m.pattern(), m.next(), true); } else if (matcher instanceof StartsWithMatcher) { StartsWithMatcher m = matcher.as(); return new StartsWithMatcher(m.pattern(), true); } else { return matcher; } }
private Matcher escape() { char c = peek(); if (c == 'Q') { return quotation(); } else if (c == 'c') { throw unsupported("control character"); } else if (Constants.DIGIT.contains(c) || c == 'k') { throw unsupported("back references"); } else { AsciiSet set = namedCharClass(); if (set == null) { advance(); return new CharSeqMatcher(String.valueOf(c)); } else { return new CharClassMatcher(set); } } }
/** * If a character class is empty, then it will not match anything and can be treated * as false. */ static Matcher convertEmptyCharClassToFalse(Matcher matcher) { if (matcher instanceof CharClassMatcher) { return matcher.<CharClassMatcher>as().set().isEmpty() ? FalseMatcher.INSTANCE : matcher; } return matcher; }
@Override public int matches(String str, int start, int length) { if (length > 0) { char c = str.charAt(start); boolean matched = ignoreCase ? set.contains(c) || set.contains(toOtherCase(c)) : set.contains(c); return matched ? start + 1 : Constants.NO_MATCH; } else { return Constants.NO_MATCH; } }
private Matcher escape() { char c = peek(); if (c == 'Q') { return quotation(); } else if (c == 'c') { throw unsupported("control character"); } else if (Constants.DIGIT.contains(c) || c == 'k') { throw unsupported("back references"); } else { AsciiSet set = namedCharClass(); if (set == null) { advance(); return new CharSeqMatcher(String.valueOf(c)); } else { return new CharClassMatcher(set); } } }
/** * If a character class has a single value, then just match that value ({@code "[a]" => "a"}). * This allows other optimizations to merge the value into adjacent matchers to get a larger * prefix or indexOf check. */ static Matcher convertSingleCharClassToSeq(Matcher matcher) { if (matcher instanceof CharClassMatcher) { Optional<Character> opt = matcher.<CharClassMatcher>as().set().character(); if (opt.isPresent()) { return new CharSeqMatcher(opt.get()); } } return matcher; }
@Test public void convertEmptyCharClassToFalse() { Matcher input = new CharClassMatcher(AsciiSet.none()); Matcher expected = FalseMatcher.INSTANCE; Assertions.assertEquals(expected, Optimizer.convertEmptyCharClassToFalse(input)); }
/** * If a character class has a single value, then just match that value ({@code "[a]" => "a"}). * This allows other optimizations to merge the value into adjacent matchers to get a larger * prefix or indexOf check. */ static Matcher convertSingleCharClassToSeq(Matcher matcher) { if (matcher instanceof CharClassMatcher) { Optional<Character> opt = matcher.<CharClassMatcher>as().set().character(); if (opt.isPresent()) { return new CharSeqMatcher(opt.get()); } } return matcher; }
@Test public void convertSingleCharClassToSeq() { Matcher input = new CharClassMatcher(AsciiSet.fromPattern("a")); Matcher expected = new CharSeqMatcher('a'); Assertions.assertEquals(expected, Optimizer.convertSingleCharClassToSeq(input)); }