/** * Zero or more start anchors is the same as not being anchored by the start. */ static Matcher removeRepeatedStart(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof StartMatcher) { return zm.next(); } } return matcher; }
/** * Zero or more start anchors is the same as not being anchored by the start. */ static Matcher removeRepeatedStart(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof StartMatcher) { return zm.next(); } } return matcher; }
/** * If a start anchor is followed by a repeated any match, then the start anchor can be removed * as it will not change the result ({@code "^.*" => ".*"}). */ static Matcher removeStartFollowedByMatchAny(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); if (matchers.size() == 2 && matchers.get(0) instanceof StartMatcher && matchers.get(1) instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matchers.get(1).as(); if (zm.repeated() instanceof AnyMatcher) { return zm; } } } return matcher; }
/** * If a start anchor is followed by a repeated any match, then the start anchor can be removed * as it will not change the result ({@code "^.*" => ".*"}). */ static Matcher removeStartFollowedByMatchAny(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); if (matchers.size() == 2 && matchers.get(0) instanceof StartMatcher && matchers.get(1) instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matchers.get(1).as(); if (zm.repeated() instanceof AnyMatcher) { return zm; } } } return matcher; }
/** * Adjacent any matches can be consolidated, e.g., ({@code ".*.*" => ".*"}). */ static Matcher removeSequentialMatchAny(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm1 = matcher.as(); if (zm1.repeated() instanceof AnyMatcher && zm1.next() instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm2 = zm1.next().as(); if (zm2.repeated() instanceof AnyMatcher) { return zm2; } } } return matcher; }
/** * Adjacent any matches can be consolidated, e.g., ({@code ".*.*" => ".*"}). */ static Matcher removeSequentialMatchAny(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm1 = matcher.as(); if (zm1.repeated() instanceof AnyMatcher && zm1.next() instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm2 = zm1.next().as(); if (zm2.repeated() instanceof AnyMatcher) { return zm2; } } } return matcher; }
/** * If the match after a repeated pattern is false, then treat the whole match as false. * For example: {@code ".*$." => "$."}. */ static Matcher zeroOrMoreFalse(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof FalseMatcher || zm.next() instanceof FalseMatcher) { return zm.next(); } } return matcher; }
/** * Remove match any pattern at the end, e.g., ({@code "foo.*$" => "foo"}). */ static Matcher removeTrailingMatchAny(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); boolean atEnd = zm.next() instanceof TrueMatcher || zm.next() instanceof EndMatcher; if (atEnd && zm.repeated() instanceof AnyMatcher) { return TrueMatcher.INSTANCE; } } return matcher; }
/** * Remove match any pattern at the end, e.g., ({@code "foo.*$" => "foo"}). */ static Matcher removeTrailingMatchAny(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); boolean atEnd = zm.next() instanceof TrueMatcher || zm.next() instanceof EndMatcher; if (atEnd && zm.repeated() instanceof AnyMatcher) { return TrueMatcher.INSTANCE; } } return matcher; }
/** * If the match after a repeated pattern is false, then treat the whole match as false. * For example: {@code ".*$." => "$."}. */ static Matcher zeroOrMoreFalse(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof FalseMatcher || zm.next() instanceof FalseMatcher) { return zm.next(); } } return matcher; }
/** * Adjacent any matches can be consolidated, e.g., ({@code ".*(.*foo)" => ".*foo"}). */ static Matcher removeMatchAnyFollowedByIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && PatternUtils.getPrefix(zm.next()) instanceof IndexOfMatcher) { return zm.next(); } } return matcher; }
/** * Get the prefix matcher. This is similar to {@link #head(Matcher)} except that it can * reach into character sequences as well as higher level sequences. */ static Matcher getPrefix(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return ms.get(0); } else if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); return new ZeroOrMoreMatcher(zm.repeated(), TrueMatcher.INSTANCE); } else if (matcher instanceof CharSeqMatcher) { String pattern = matcher.<CharSeqMatcher>as().pattern(); return pattern.isEmpty() ? null : new CharSeqMatcher(pattern.charAt(0)); } else { return matcher; } }
/** * Adjacent any matches can be consolidated, e.g., ({@code ".*(.*foo)" => ".*foo"}). */ static Matcher removeMatchAnyFollowedByIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && PatternUtils.getPrefix(zm.next()) instanceof IndexOfMatcher) { return zm.next(); } } return matcher; }
/** * Get the prefix matcher. This is similar to {@link #head(Matcher)} except that it can * reach into character sequences as well as higher level sequences. */ static Matcher getPrefix(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return ms.get(0); } else if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); return new ZeroOrMoreMatcher(zm.repeated(), TrueMatcher.INSTANCE); } else if (matcher instanceof CharSeqMatcher) { String pattern = matcher.<CharSeqMatcher>as().pattern(); return pattern.isEmpty() ? null : new CharSeqMatcher(pattern.charAt(0)); } else { return matcher; } }
/** * If the matcher preceding an OR clause is a repeated any match, move into each branch * of the OR clause. This allows for other optimizations such as conversion to an indexOf * to take effect for each branch. */ static Matcher inlineMatchAnyPrecedingOr(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && zm.next() instanceof OrMatcher) { List<Matcher> matchers = zm.next().<OrMatcher>as().matchers(); List<Matcher> ms = new ArrayList<>(); for (Matcher m : matchers) { ms.add(new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, m)); } return OrMatcher.create(ms); } } return matcher; }
/** * If the matcher preceding an OR clause is a repeated any match, move into each branch * of the OR clause. This allows for other optimizations such as conversion to an indexOf * to take effect for each branch. */ static Matcher inlineMatchAnyPrecedingOr(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && zm.next() instanceof OrMatcher) { List<Matcher> matchers = zm.next().<OrMatcher>as().matchers(); List<Matcher> ms = new ArrayList<>(); for (Matcher m : matchers) { ms.add(new ZeroOrMoreMatcher(AnyMatcher.INSTANCE, m)); } return OrMatcher.create(ms); } } return matcher; }
/** * If a start anchor is preceded by a repeated any match, then the any match can be removed * as it must be empty for the start anchor to match ({@code ".*^" => "^"}). */ static Matcher removeMatchAnyFollowedByStart(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && zm.next() instanceof SeqMatcher && zm.next().<SeqMatcher>as().matchers().get(0).isStartAnchored()) { return zm.next(); } } return matcher; }
/** * If a start anchor is preceded by a repeated any match, then the any match can be removed * as it must be empty for the start anchor to match ({@code ".*^" => "^"}). */ static Matcher removeMatchAnyFollowedByStart(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && zm.next() instanceof SeqMatcher && zm.next().<SeqMatcher>as().matchers().get(0).isStartAnchored()) { return zm.next(); } } return matcher; }
/** * If a char sequence is preceded by a repeated any match, then replace with an * IndexOfMatcher. The index of operation seems to be optimized by the JDK and is * much faster. Example: {@code ".*foo" => indexOf("foo")}. */ static Matcher convertRepeatedAnyCharSeqToIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm1 = matcher.as(); Matcher prefix = PatternUtils.getPrefix(zm1.next()); if (zm1.repeated() instanceof AnyMatcher && prefix instanceof CharSeqMatcher) { String pattern = prefix.<CharSeqMatcher>as().pattern(); Matcher suffix = PatternUtils.getSuffix(zm1.next()); return new IndexOfMatcher(pattern, suffix); } } return matcher; }
/** * If a char sequence is preceded by a repeated any match, then replace with an * IndexOfMatcher. The index of operation seems to be optimized by the JDK and is * much faster. Example: {@code ".*foo" => indexOf("foo")}. */ static Matcher convertRepeatedAnyCharSeqToIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm1 = matcher.as(); Matcher prefix = PatternUtils.getPrefix(zm1.next()); if (zm1.repeated() instanceof AnyMatcher && prefix instanceof CharSeqMatcher) { String pattern = prefix.<CharSeqMatcher>as().pattern(); Matcher suffix = PatternUtils.getSuffix(zm1.next()); return new IndexOfMatcher(pattern, suffix); } } return matcher; }