/** Returns the first matcher from a sequence. */ static Matcher head(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return ms.get(0); } else { return matcher; } }
/** Returns the first matcher from a sequence. */ static Matcher head(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return ms.get(0); } else { return matcher; } }
/** * If a sequence contains an explicit false matcher then the whole sequence will never match * and can be treated as false. */ static Matcher sequenceWithFalseIsFalse(Matcher matcher) { if (matcher instanceof SeqMatcher) { for (Matcher m : matcher.<SeqMatcher>as().matchers()) { if (m instanceof FalseMatcher) { return FalseMatcher.INSTANCE; } } } return matcher; }
/** * If a sequence contains an explicit false matcher then the whole sequence will never match * and can be treated as false. */ static Matcher sequenceWithFalseIsFalse(Matcher matcher) { if (matcher instanceof SeqMatcher) { for (Matcher m : matcher.<SeqMatcher>as().matchers()) { if (m instanceof FalseMatcher) { return FalseMatcher.INSTANCE; } } } return matcher; }
/** * Returns all but the first matcher from a sequence or True if there is only a single * matcher in the sequence. */ static Matcher tail(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return SeqMatcher.create(ms.subList(1, ms.size())); } else { return TrueMatcher.INSTANCE; } }
/** * Returns all but the first matcher from a sequence or True if there is only a single * matcher in the sequence. */ static Matcher tail(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return SeqMatcher.create(ms.subList(1, ms.size())); } else { return TrueMatcher.INSTANCE; } }
/** * Since we do not need to capture the contents, nested sequences can be simplified to * a just a simple sequence. For example, {@code "a(b.*c)d" => "ab.*cd"}. */ static Matcher flattenNestedSequences(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); List<Matcher> ms = new ArrayList<>(); for (Matcher m : matchers) { if (m instanceof SeqMatcher) { ms.addAll(m.<SeqMatcher>as().matchers()); } else { ms.add(m); } } return SeqMatcher.create(ms); } return matcher; }
/** * Since we do not need to capture the contents, nested sequences can be simplified to * a just a simple sequence. For example, {@code "a(b.*c)d" => "ab.*cd"}. */ static Matcher flattenNestedSequences(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); List<Matcher> ms = new ArrayList<>(); for (Matcher m : matchers) { if (m instanceof SeqMatcher) { ms.addAll(m.<SeqMatcher>as().matchers()); } else { ms.add(m); } } return SeqMatcher.create(ms); } return matcher; }
/** * The true matcher is sometimes used as a placeholder while parsing. For sequences it isn't * needed and it is faster to leave them out. */ static Matcher removeTrueInSequence(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); List<Matcher> ms = new ArrayList<>(); for (Matcher m : matchers) { if (!(m instanceof TrueMatcher)) { ms.add(m); } } return SeqMatcher.create(ms); } return matcher; }
/** * The true matcher is sometimes used as a placeholder while parsing. For sequences it isn't * needed and it is faster to leave them out. */ static Matcher removeTrueInSequence(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); List<Matcher> ms = new ArrayList<>(); for (Matcher m : matchers) { if (!(m instanceof TrueMatcher)) { ms.add(m); } } return SeqMatcher.create(ms); } return matcher; }
/** * If a start anchor is followed by a repeated any match, then the start anchor can be removed * as it will not change the result ({@code "^.*" => ".*"}). */ static Matcher removeStartFollowedByMatchAny(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); if (matchers.size() == 2 && matchers.get(0) instanceof StartMatcher && matchers.get(1) instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matchers.get(1).as(); if (zm.repeated() instanceof AnyMatcher) { return zm; } } } return matcher; }
/** * If a start anchor is followed by a repeated any match, then the start anchor can be removed * as it will not change the result ({@code "^.*" => ".*"}). */ static Matcher removeStartFollowedByMatchAny(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); if (matchers.size() == 2 && matchers.get(0) instanceof StartMatcher && matchers.get(1) instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matchers.get(1).as(); if (zm.repeated() instanceof AnyMatcher) { return zm; } } } return matcher; }
/** * If the matcher has a start anchored character sequence, then replace it with a * StartsWithMatcher. In a tight loop this is much faster than a running with a sequence * of two matchers. */ static Matcher startsWithCharSeq(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> matchers = matcher.<SeqMatcher>as().matchers(); if (matchers.size() >= 2 && matchers.get(0) instanceof StartMatcher && matchers.get(1) instanceof CharSeqMatcher) { List<Matcher> ms = new ArrayList<>(); ms.add(new StartsWithMatcher(matchers.get(1).<CharSeqMatcher>as().pattern())); ms.addAll(matchers.subList(2, matchers.size())); return SeqMatcher.create(ms); } } return matcher; }
/** * If a sequence contains content after an end anchor then it will never be able to match * and can be treated as false. */ static Matcher sequenceWithStuffAfterEndIsFalse(Matcher matcher) { if (matcher instanceof SeqMatcher) { boolean end = false; for (Matcher m : matcher.<SeqMatcher>as().matchers()) { if (m instanceof EndMatcher) { end = true; } else if (end && !m.alwaysMatches()) { return FalseMatcher.INSTANCE; } } } return matcher; }
/** * If a sequence contains content after an end anchor then it will never be able to match * and can be treated as false. */ static Matcher sequenceWithStuffAfterEndIsFalse(Matcher matcher) { if (matcher instanceof SeqMatcher) { boolean end = false; for (Matcher m : matcher.<SeqMatcher>as().matchers()) { if (m instanceof EndMatcher) { end = true; } else if (end && !m.alwaysMatches()) { return FalseMatcher.INSTANCE; } } } return matcher; }
/** * Get the suffix matcher. This is similar to {@link #tail(Matcher)} except that it intended * to be used with {@link #getPrefix(Matcher)} */ static Matcher getSuffix(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return SeqMatcher.create(ms.subList(1, ms.size())); } else if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); return zm.next(); } else if (matcher instanceof CharSeqMatcher) { String pattern = matcher.<CharSeqMatcher>as().pattern(); return pattern.length() <= 1 ? TrueMatcher.INSTANCE : new CharSeqMatcher(pattern.substring(1)); } else { return TrueMatcher.INSTANCE; } } }
/** * Get the prefix matcher. This is similar to {@link #head(Matcher)} except that it can * reach into character sequences as well as higher level sequences. */ static Matcher getPrefix(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return ms.get(0); } else if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); return new ZeroOrMoreMatcher(zm.repeated(), TrueMatcher.INSTANCE); } else if (matcher instanceof CharSeqMatcher) { String pattern = matcher.<CharSeqMatcher>as().pattern(); return pattern.isEmpty() ? null : new CharSeqMatcher(pattern.charAt(0)); } else { return matcher; } }
/** * Get the prefix matcher. This is similar to {@link #head(Matcher)} except that it can * reach into character sequences as well as higher level sequences. */ static Matcher getPrefix(Matcher matcher) { if (matcher instanceof SeqMatcher) { List<Matcher> ms = matcher.<SeqMatcher>as().matchers(); return ms.get(0); } else if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); return new ZeroOrMoreMatcher(zm.repeated(), TrueMatcher.INSTANCE); } else if (matcher instanceof CharSeqMatcher) { String pattern = matcher.<CharSeqMatcher>as().pattern(); return pattern.isEmpty() ? null : new CharSeqMatcher(pattern.charAt(0)); } else { return matcher; } }
/** * If a start anchor is preceded by a repeated any match, then the any match can be removed * as it must be empty for the start anchor to match ({@code ".*^" => "^"}). */ static Matcher removeMatchAnyFollowedByStart(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && zm.next() instanceof SeqMatcher && zm.next().<SeqMatcher>as().matchers().get(0).isStartAnchored()) { return zm.next(); } } return matcher; }
/** * If a start anchor is preceded by a repeated any match, then the any match can be removed * as it must be empty for the start anchor to match ({@code ".*^" => "^"}). */ static Matcher removeMatchAnyFollowedByStart(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && zm.next() instanceof SeqMatcher && zm.next().<SeqMatcher>as().matchers().get(0).isStartAnchored()) { return zm.next(); } } return matcher; }