/** * If the matcher has a start anchored character sequence, then replace it with a * StartsWithMatcher. In a tight loop this is much faster than a running with a sequence * of two matchers. */ static Matcher startsWithCharSeq(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); if (matchers.size() >= 2 && matchers.get(0) instanceof StartMatcher && matchers.get(1) instanceof CharSeqMatcher) { List<Matcher> ms = new ArrayList<>(); ms.add(new StartsWithMatcher(matchers.get(1).<CharSeqMatcher>as().pattern())); ms.addAll(matchers.subList(2, matchers.size())); return SeqMatcher.create(ms); } } return matcher; }
/** * If the matcher has a start anchored character sequence, then replace it with a * StartsWithMatcher. In a tight loop this is much faster than a running with a sequence * of two matchers. */ static Matcher startsWithCharSeq(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); if (matchers.size() >= 2 && matchers.get(0) instanceof StartMatcher && matchers.get(1) instanceof CharSeqMatcher) { List<Matcher> ms = new ArrayList<>(); ms.add(new StartsWithMatcher(matchers.get(1).<CharSeqMatcher>as().pattern())); ms.addAll(matchers.subList(2, matchers.size())); return SeqMatcher.create(ms); } } return matcher; }
/** * If a char sequence is adjacent to a starts with matcher, then append the sequence to * the prefix pattern of the starts with matcher. */ static Matcher combineCharSeqAfterStartsWith(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); if (matchers.size() >= 2 && matchers.get(0) instanceof StartsWithMatcher && matchers.get(1) instanceof CharSeqMatcher) { List<Matcher> ms = new ArrayList<>(); String prefix = matchers.get(0).<StartsWithMatcher>as().pattern() + matchers.get(1).<CharSeqMatcher>as().pattern(); ms.add(new StartsWithMatcher(prefix)); ms.addAll(matchers.subList(2, matchers.size())); return SeqMatcher.create(ms); } else { return matcher; } } return matcher; }
/** * If a char sequence is adjacent to a starts with matcher, then append the sequence to * the prefix pattern of the starts with matcher. */ static Matcher combineCharSeqAfterStartsWith(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); if (matchers.size() >= 2 && matchers.get(0) instanceof StartsWithMatcher && matchers.get(1) instanceof CharSeqMatcher) { List<Matcher> ms = new ArrayList<>(); String prefix = matchers.get(0).<StartsWithMatcher>as().pattern() + matchers.get(1).<CharSeqMatcher>as().pattern(); ms.add(new StartsWithMatcher(prefix)); ms.addAll(matchers.subList(2, matchers.size())); return SeqMatcher.create(ms); } else { return matcher; } } return matcher; }
/** * Get the suffix matcher. This is similar to {@link #tail(Matcher)} except that it intended * to be used with {@link #getPrefix(Matcher)} */ static Matcher getSuffix(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return SeqMatcher.create(ms.subList(1, ms.size())); } else if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); return zm.next(); } else if (matcher instanceof CharSeqMatcher) { String pattern = matcher.<CharSeqMatcher>as().pattern(); return pattern.length() <= 1 ? TrueMatcher.INSTANCE : new CharSeqMatcher(pattern.substring(1)); } else { return TrueMatcher.INSTANCE; } } }
/** * Get the suffix matcher. This is similar to {@link #tail(Matcher)} except that it intended * to be used with {@link #getPrefix(Matcher)} */ static Matcher getSuffix(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return SeqMatcher.create(ms.subList(1, ms.size())); } else if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); return zm.next(); } else if (matcher instanceof CharSeqMatcher) { String pattern = matcher.<CharSeqMatcher>as().pattern(); return pattern.length() <= 1 ? TrueMatcher.INSTANCE : new CharSeqMatcher(pattern.substring(1)); } else { return TrueMatcher.INSTANCE; } } }
/** * Get the prefix matcher. This is similar to {@link #head(Matcher)} except that it can * reach into character sequences as well as higher level sequences. */ static Matcher getPrefix(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return ms.get(0); } else if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); return new ZeroOrMoreMatcher(zm.repeated(), TrueMatcher.INSTANCE); } else if (matcher instanceof CharSeqMatcher) { String pattern = matcher.<CharSeqMatcher>as().pattern(); return pattern.isEmpty() ? null : new CharSeqMatcher(pattern.charAt(0)); } else { return matcher; } }
/** * Get the prefix matcher. This is similar to {@link #head(Matcher)} except that it can * reach into character sequences as well as higher level sequences. */ static Matcher getPrefix(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return ms.get(0); } else if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); return new ZeroOrMoreMatcher(zm.repeated(), TrueMatcher.INSTANCE); } else if (matcher instanceof CharSeqMatcher) { String pattern = matcher.<CharSeqMatcher>as().pattern(); return pattern.isEmpty() ? null : new CharSeqMatcher(pattern.charAt(0)); } else { return matcher; } }
/** * If a char sequence is preceded by a repeated any match, then replace with an * IndexOfMatcher. The index of operation seems to be optimized by the JDK and is * much faster. Example: {@code ".*foo" => indexOf("foo")}. */ static Matcher convertRepeatedAnyCharSeqToIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm1 = matcher.as(); Matcher prefix = PatternUtils.getPrefix(zm1.next()); if (zm1.repeated() instanceof AnyMatcher && prefix instanceof CharSeqMatcher) { String pattern = prefix.<CharSeqMatcher>as().pattern(); Matcher suffix = PatternUtils.getSuffix(zm1.next()); return new IndexOfMatcher(pattern, suffix); } } return matcher; }
/** * If a char sequence is adjacent to an index of matcher, then append the sequence to * the pattern of the index of matcher. */ static Matcher combineCharSeqAfterIndexOf(Matcher matcher) { if (matcher instanceof IndexOfMatcher) { IndexOfMatcher m = matcher.as(); Matcher next = PatternUtils.head(m.next()); if (next instanceof CharSeqMatcher) { String pattern = m.pattern() + next.<CharSeqMatcher>as().pattern(); return new IndexOfMatcher(pattern, PatternUtils.tail(m.next())); } } return matcher; }
/** * If a char sequence is adjacent to an index of matcher, then append the sequence to * the pattern of the index of matcher. */ static Matcher combineCharSeqAfterIndexOf(Matcher matcher) { if (matcher instanceof IndexOfMatcher) { IndexOfMatcher m = matcher.as(); Matcher next = PatternUtils.head(m.next()); if (next instanceof CharSeqMatcher) { String pattern = m.pattern() + next.<CharSeqMatcher>as().pattern(); return new IndexOfMatcher(pattern, PatternUtils.tail(m.next())); } } return matcher; }
/** * If a char sequence is preceded by a repeated any match, then replace with an * IndexOfMatcher. The index of operation seems to be optimized by the JDK and is * much faster. Example: {@code ".*foo" => indexOf("foo")}. */ static Matcher convertRepeatedAnyCharSeqToIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm1 = matcher.as(); Matcher prefix = PatternUtils.getPrefix(zm1.next()); if (zm1.repeated() instanceof AnyMatcher && prefix instanceof CharSeqMatcher) { String pattern = prefix.<CharSeqMatcher>as().pattern(); Matcher suffix = PatternUtils.getSuffix(zm1.next()); return new IndexOfMatcher(pattern, suffix); } } return matcher; }
/** Convert to a matchers that ignores the case. */ static Matcher ignoreCase(Matcher matcher) { if (matcher instanceof CharClassMatcher) { CharClassMatcher m = matcher.as(); return new CharClassMatcher(m.set(), true); } else if (matcher instanceof CharSeqMatcher) { CharSeqMatcher m = matcher.as(); return new CharSeqMatcher(m.pattern(), true); } else if (matcher instanceof IndexOfMatcher) { IndexOfMatcher m = matcher.as(); return new IndexOfMatcher(m.pattern(), m.next(), true); } else if (matcher instanceof StartsWithMatcher) { StartsWithMatcher m = matcher.as(); return new StartsWithMatcher(m.pattern(), true); } else { return matcher; } }
/** Convert to a matchers that ignores the case. */ static Matcher ignoreCase(Matcher matcher) { if (matcher instanceof CharClassMatcher) { CharClassMatcher m = matcher.as(); return new CharClassMatcher(m.set(), true); } else if (matcher instanceof CharSeqMatcher) { CharSeqMatcher m = matcher.as(); return new CharSeqMatcher(m.pattern(), true); } else if (matcher instanceof IndexOfMatcher) { IndexOfMatcher m = matcher.as(); return new IndexOfMatcher(m.pattern(), m.next(), true); } else if (matcher instanceof StartsWithMatcher) { StartsWithMatcher m = matcher.as(); return new StartsWithMatcher(m.pattern(), true); } else { return matcher; } }