@Override public String toString() { return PatternUtils.escape(pattern); }
/** * Compile a pattern string and return a matcher that can be used to check if string values * match the pattern. Pattern matchers are can be reused many times and are thread safe. */ static PatternMatcher compile(String pattern) { return PatternUtils.compile(pattern); }
/** * Create an UnsupportedOperationException with a message including context based * on the position. */ static UnsupportedOperationException unsupported(String message, String str, int pos) { return new UnsupportedOperationException(message + "\n" + context(str, pos)); }
/** * Compile a pattern string and return a matcher that can be used to check if string values * match the pattern. Pattern matchers are can be reused many times and are thread safe. */ public static PatternMatcher compile(String pattern) { String p = pattern; boolean ignoreCase = false; if (p.startsWith("(?i)")) { ignoreCase = true; p = pattern.substring(4); } if (p.length() > 0) { p = "^.*(" + p + ").*$"; } Parser parser = new Parser(PatternUtils.expandEscapedChars(p)); Matcher m = Optimizer.optimize(parser.parse()); return ignoreCase ? m.ignoreCase() : m; }
/** * Extract common prefix from OR clause. This is beneficial because it reduces the amount * that needs to be checked for each branch. For example: {@code "ab|ac" => "a(b|c)"}. */ static Matcher extractPrefixFromOr(Matcher matcher) { if (matcher instanceof OrMatcher) { // Get the prefix for the first condition List<Matcher> matchers = matcher.<OrMatcher>as().matchers(); if (matchers.isEmpty()) { return matcher; } Matcher prefix = PatternUtils.getPrefix(matchers.get(0)); if (prefix.alwaysMatches()) { return matcher; } List<Matcher> ms = new ArrayList<>(); ms.add(PatternUtils.getSuffix(matchers.get(0))); // Verify all OR conditions have the same prefix for (Matcher m : matchers.subList(1, matchers.size())) { Matcher p = PatternUtils.getPrefix(m); if (!prefix.equals(p)) { return matcher; } ms.add(PatternUtils.getSuffix(m)); } return SeqMatcher.create(prefix, OrMatcher.create(ms)); } return matcher; }
/** * If a char sequence is adjacent to an index of matcher, then append the sequence to * the pattern of the index of matcher. */ static Matcher combineCharSeqAfterIndexOf(Matcher matcher) { if (matcher instanceof IndexOfMatcher) { IndexOfMatcher m = matcher.as(); Matcher next = PatternUtils.head(m.next()); if (next instanceof CharSeqMatcher) { String pattern = m.pattern() + next.<CharSeqMatcher>as().pattern(); return new IndexOfMatcher(pattern, PatternUtils.tail(m.next())); } } return matcher; }
++i; if (i >= str.length()) { throw error("dangling escape", str, i); throw error("invalid octal escape sequence", str, i); c = parse(str.substring(i + 1, i + numDigits + 1), 8, "octal", str, i); builder.append(c); i += numDigits; case 'x': if (i + 3 > str.length()) { throw error("invalid hexadecimal escape sequence", str, i); c = parse(str.substring(i + 1, i + 3), 16, "hexadecimal", str, i); builder.append(c); i += 2; case 'u': if (i + 5 > str.length()) { throw error("invalid unicode escape sequence", str, i); c = parse(str.substring(i + 1, i + 5), 16, "unicode", str, i); builder.append(c); i += 4;
private IllegalArgumentException error(String message) { return PatternUtils.error(message, tokens, current); }
/** * Adjacent any matches can be consolidated, e.g., ({@code ".*(.*foo)" => ".*foo"}). */ static Matcher removeMatchAnyFollowedByIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && PatternUtils.getPrefix(zm.next()) instanceof IndexOfMatcher) { return zm.next(); } } return matcher; }
/** * Compile a pattern string and return a matcher that can be used to check if string values * match the pattern. Pattern matchers are can be reused many times and are thread safe. */ public static PatternMatcher compile(String pattern) { String p = pattern; boolean ignoreCase = false; if (p.startsWith("(?i)")) { ignoreCase = true; p = pattern.substring(4); } if (p.length() > 0) { p = "^.*(" + p + ").*$"; } Parser parser = new Parser(PatternUtils.expandEscapedChars(p)); Matcher m = Optimizer.optimize(parser.parse()); return ignoreCase ? m.ignoreCase() : m; }
/** * Extract common prefix from OR clause. This is beneficial because it reduces the amount * that needs to be checked for each branch. For example: {@code "ab|ac" => "a(b|c)"}. */ static Matcher extractPrefixFromOr(Matcher matcher) { if (matcher instanceof OrMatcher) { // Get the prefix for the first condition List<Matcher> matchers = matcher.<OrMatcher>as().matchers(); if (matchers.isEmpty()) { return matcher; } Matcher prefix = PatternUtils.getPrefix(matchers.get(0)); if (prefix.alwaysMatches()) { return matcher; } List<Matcher> ms = new ArrayList<>(); ms.add(PatternUtils.getSuffix(matchers.get(0))); // Verify all OR conditions have the same prefix for (Matcher m : matchers.subList(1, matchers.size())) { Matcher p = PatternUtils.getPrefix(m); if (!prefix.equals(p)) { return matcher; } ms.add(PatternUtils.getSuffix(m)); } return SeqMatcher.create(prefix, OrMatcher.create(ms)); } return matcher; }
/** * If a char sequence is adjacent to an index of matcher, then append the sequence to * the pattern of the index of matcher. */ static Matcher combineCharSeqAfterIndexOf(Matcher matcher) { if (matcher instanceof IndexOfMatcher) { IndexOfMatcher m = matcher.as(); Matcher next = PatternUtils.head(m.next()); if (next instanceof CharSeqMatcher) { String pattern = m.pattern() + next.<CharSeqMatcher>as().pattern(); return new IndexOfMatcher(pattern, PatternUtils.tail(m.next())); } } return matcher; }
++i; if (i >= str.length()) { throw error("dangling escape", str, i); throw error("invalid octal escape sequence", str, i); c = parse(str.substring(i + 1, i + numDigits + 1), 8, "octal", str, i); builder.append(c); i += numDigits; case 'x': if (i + 3 > str.length()) { throw error("invalid hexadecimal escape sequence", str, i); c = parse(str.substring(i + 1, i + 3), 16, "hexadecimal", str, i); builder.append(c); i += 2; case 'u': if (i + 5 > str.length()) { throw error("invalid unicode escape sequence", str, i); c = parse(str.substring(i + 1, i + 5), 16, "unicode", str, i); builder.append(c); i += 4;
private IllegalArgumentException error(String message) { return PatternUtils.error(message, tokens, current); }
/** * Adjacent any matches can be consolidated, e.g., ({@code ".*(.*foo)" => ".*foo"}). */ static Matcher removeMatchAnyFollowedByIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm = matcher.as(); if (zm.repeated() instanceof AnyMatcher && PatternUtils.getPrefix(zm.next()) instanceof IndexOfMatcher) { return zm.next(); } } return matcher; }
@Test public void expandHexUpper() { for (int i = 0; i < 0xFF; ++i) { String expected = Character.toString((char) i); String str = String.format("\\x%02X", i); Assertions.assertEquals(expected, PatternUtils.expandEscapedChars(str)); } }
@Override public String toString() { return ".*" + PatternUtils.escape(pattern) + next; }
/** * If a char sequence is preceded by a repeated any match, then replace with an * IndexOfMatcher. The index of operation seems to be optimized by the JDK and is * much faster. Example: {@code ".*foo" => indexOf("foo")}. */ static Matcher convertRepeatedAnyCharSeqToIndexOf(Matcher matcher) { if (matcher instanceof ZeroOrMoreMatcher) { ZeroOrMoreMatcher zm1 = matcher.as(); Matcher prefix = PatternUtils.getPrefix(zm1.next()); if (zm1.repeated() instanceof AnyMatcher && prefix instanceof CharSeqMatcher) { String pattern = prefix.<CharSeqMatcher>as().pattern(); Matcher suffix = PatternUtils.getSuffix(zm1.next()); return new IndexOfMatcher(pattern, suffix); } } return matcher; }
/** * Create an IllegalArgumentException with a message including context based * on the position. */ static IllegalArgumentException error(String message, String str, int pos) { return new IllegalArgumentException(message + "\n" + context(str, pos)); }
@SuppressWarnings("PMD.PreserveStackTrace") private static char parse(String num, int radix, String mode, String str, int pos) { try { return (char) Integer.parseInt(num, radix); } catch (NumberFormatException e) { throw error("invalid " + mode + " escape sequence", str, pos); } }