java.util.regex.Pattern java code examples

Refine search

Matcher

/**
 * Returns {@code true} if the exclusion {@link Pattern} at index {@code patternIndex}
 * matches the supplied candidate {@code String}.
 */
@Override
protected boolean matchesExclusion(String candidate, int patternIndex) {
  Matcher matcher = this.compiledExclusionPatterns[patternIndex].matcher(candidate);
  return matcher.matches();
}

 String mydata = "some string with 'the data i want' inside";
Pattern pattern = Pattern.compile("'(.*?)'");
Matcher matcher = pattern.matcher(mydata);
if (matcher.find())
{
  System.out.println(matcher.group(1));
}

String[] parts = string.split(Pattern.quote(".")); // Split on period.

/**
 * Returns a new {@code MatchResult} that corresponds to a successful match. Apache Harmony (used
 * in Android) requires a successful match in order to generate a {@code MatchResult}:
 * http://goo.gl/5VQFmC
 */
private static MatchResult newMatchResult() {
 Matcher matcher = Pattern.compile(".").matcher("X");
 matcher.find();
 return matcher.toMatchResult();
}

/**
 * Splits this string using the supplied regular expression {@code expr}. The parameter {@code max} controls the
 * behavior how many times the pattern is applied to the string.
 *
 * @param expr the regular expression used to divide the string.
 * @param max the number of entries in the resulting array.
 * @return an array of Strings created by separating the string along matches of the regular expression.
 * @throws NullPointerException if {@code expr} is {@code null}.
 * @throws PatternSyntaxException if the syntax of the supplied regular expression is not valid.
 * @see Pattern#split(CharSequence, int)
 */
public AsciiString[] split(String expr, int max) {
  return toAsciiStringArray(Pattern.compile(expr).split(this, max));
}

/**
 * Gets the set of unique path parameters used in the given URI. If a parameter is used twice
 * in the URI, it will only show up once in the set.
 */
static Set<String> parsePathParameters(String path) {
 Matcher m = PARAM_URL_REGEX.matcher(path);
 Set<String> patterns = new LinkedHashSet<>();
 while (m.find()) {
  patterns.add(m.group(1));
 }
 return patterns;
}

/**
 * Constructs a pattern file name filter object.
 *
 * @param patternStr the pattern string on which to filter file names
 * @throws PatternSyntaxException if pattern compilation fails (runtime)
 */
public PatternFilenameFilter(String patternStr) {
 this(Pattern.compile(patternStr));
}

 private static void assertContainsRegex(String expectedRegex, String actual) {
  Pattern pattern = Pattern.compile(expectedRegex);
  Matcher matcher = pattern.matcher(actual);
  if (!matcher.find()) {
   String actualDesc = (actual == null) ? "null" : ('<' + actual + '>');
   fail("expected to contain regex:<" + expectedRegex + "> but was:" + actualDesc);
  }
 }
}

private void assertDescriptionContainsExpectedPath(ClassPathResource resource, String expectedPath) {
  Matcher matcher = DESCRIPTION_PATTERN.matcher(resource.getDescription());
  assertTrue(matcher.matches());
  assertEquals(1, matcher.groupCount());
  String match = matcher.group(1);
  assertEquals(expectedPath, match);
}

private void validatePathName(int p, String name) {
 if (!PARAM_NAME_REGEX.matcher(name).matches()) {
  throw parameterError(method, p, "@Path parameter name must match %s. Found: %s",
    PARAM_URL_REGEX.pattern(), name);
 }
 // Verify URL replacement name is actually present in the URL path.
 if (!relativeUrlParamNames.contains(name)) {
  throw parameterError(method, p, "URL \"%s\" does not contain \"{%s}\".", relativeUrl, name);
 }
}

  /**
   * Returns the length of the given pattern, where template variables are considered to be 1 long.
   */
  public int getLength() {
    if (this.length == null) {
      this.length = (this.pattern != null ?
          VARIABLE_PATTERN.matcher(this.pattern).replaceAll("#").length() : 0);
    }
    return this.length;
  }
}

 Pattern emailPattern = Patterns.EMAIL_ADDRESS; // API level 8+
Account[] accounts = AccountManager.get(context).getAccounts();
for (Account account : accounts) {
  if (emailPattern.matcher(account.name).matches()) {
    String possibleEmail = account.name;
    ...
  }
}

@GwtIncompatible // Doubles.tryParse
private static void checkTryParse(double expected, String input) {
 assertEquals(Double.valueOf(expected), Doubles.tryParse(input));
 assertThat(input)
   .matches(
     Pattern.compile(
       Doubles.FLOATING_POINT_PATTERN.pattern(), Doubles.FLOATING_POINT_PATTERN.flags()));
}

public void testGet_regex() {
 assertEquals(Pattern.compile("").pattern(), ArbitraryInstances.get(Pattern.class).pattern());
 assertEquals(0, ArbitraryInstances.get(MatchResult.class).groupCount());
}

/**
 * Counts the number of rows needed for textarea to fit the content.
 * Minimum 5 rows.
 */
public static int determineRows(String s) {
  if(s==null)     return 5;
  return Math.max(5,LINE_END.split(s).length);
}

@Override
public String extractVersion(String requestPath) {
  Matcher matcher = pattern.matcher(requestPath);
  if (matcher.find()) {
    String match = matcher.group(1);
    return (match.contains("-") ? match.substring(match.lastIndexOf('-') + 1) : match);
  }
  else {
    return null;
  }
}

/**
 * Compiles the supplied {@code String[]} into an array of
 * {@link Pattern} objects and returns that array.
 */
private Pattern[] compilePatterns(String[] source) throws PatternSyntaxException {
  Pattern[] destination = new Pattern[source.length];
  for (int i = 0; i < source.length; i++) {
    destination[i] = Pattern.compile(source[i]);
  }
  return destination;
}

@Override
public CommonMatcher matcher(CharSequence t) {
 return new JdkMatcher(pattern.matcher(t));
}

@GwtIncompatible // Doubles.tryParse
public void testTryParseFailures() {
 for (String badInput : BAD_TRY_PARSE_INPUTS) {
  assertThat(badInput)
    .doesNotMatch(
      Pattern.compile(
        Doubles.FLOATING_POINT_PATTERN.pattern(),
        Doubles.FLOATING_POINT_PATTERN.flags()));
  assertEquals(referenceTryParse(badInput), Doubles.tryParse(badInput));
  assertNull(Doubles.tryParse(badInput));
 }
}

@Test
public void testPatternEditor() {
  final String REGEX = "a.*";
  PropertyEditor patternEditor = new PatternEditor();
  patternEditor.setAsText(REGEX);
  assertEquals(Pattern.compile(REGEX).pattern(), ((Pattern) patternEditor.getValue()).pattern());
  assertEquals(REGEX, patternEditor.getAsText());
  patternEditor = new PatternEditor();
  assertEquals("", patternEditor.getAsText());
  patternEditor = new PatternEditor();
  patternEditor.setAsText(null);
  assertEquals("", patternEditor.getAsText());
}

Javadoc

Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches, String#replaceAll and String#split will be preferable, but if you need to do a lot of work with the same regular expression, it may be more efficient to compile it once and reuse it. The Pattern class and its companion, Matcher, also offer more functionality than the small amount exposed by String.

 
// String convenience methods: 
boolean sawFailures = s.matches("Failures: \\d+"); 
String farewell = s.replaceAll("Hello, (\\S+)", "Goodbye, $1"); 
String[] fields = s.split(":"); 
// Direct use of Pattern: 
Pattern p = Pattern.compile("Hello, (\\S+)"); 
Matcher m = p.matcher(inputString); 
while (m.find()) { // Find each match in turn; String can't do this. 
String name = m.group(1); // Access a submatch group; String can't do this. 
}

Regular expression syntax

Java supports a subset of Perl 5 regular expression syntax. An important gotcha is that Java has no regular expression literals, and uses plain old string literals instead. This means that you need an extra level of escaping. For example, the regular expression \s+ has to be represented as the string "\\s+".

Escape sequences

\	Quote the following metacharacter (so \. matches a literal .).
\Q	Quote all following metacharacters until \E.
\E	Stop quoting metacharacters (started by \Q).
\\	A literal backslash.
\uhhhh	The Unicode character U+hhhh (in hex).
\xhh	The Unicode character U+00hh (in hex).
\cx	The ASCII control character ^x (so \cH would be ^H, U+0008).
\a	The ASCII bell character (U+0007).
\e	The ASCII ESC character (U+001b).
\f	The ASCII form feed character (U+000c).
\n	The ASCII newline character (U+000a).
\r	The ASCII carriage return character (U+000d).
\t	The ASCII tab character (U+0009).

Character classes

It's possible to construct arbitrary character classes using set operations:

[abc]	Any one of a, b, or c. (Enumeration.)
[a-c]	Any one of a, b, or c. (Range.)
[^abc]	Any character except a, b, or c. (Negation.)
[[a-f][0-9]]	Any character in either range. (Union.)
[[a-z]&&[jkl]]	Any character in both ranges. (Intersection.)

Most of the time, the built-in character classes are more useful:

\d	Any digit character (see note below).
\D	Any non-digit character (see note below).
\s	Any whitespace character (see note below).
\S	Any non-whitespace character (see note below).
\w	Any word character (see note below).
\W	Any non-word character (see note below).
\p{NAME}	Any character in the class with the given NAME.
\P{NAME}	Any character not in the named class.

Note that these built-in classes don't just cover the traditional ASCII range. For example, \w is equivalent to the character class [\p{Ll}\p{Lu}\p{Lt}\p{Lo}\p{Nd}]. For more details see Unicode TR-18, and bear in mind that the set of characters in each class can vary between Unicode releases. If you actually want to match only ASCII characters, specify the explicit characters you want; if you mean 0-9 use [0-9] rather than \d, which would also include Gurmukhi digits and so forth.

There are also a variety of named classes:

Unicode category names, prefixed by Is. For example \p{IsLu}} for all uppercase letters.
POSIX class names. These are 'Alnum', 'Alpha', 'ASCII', 'Blank', 'Cntrl', 'Digit', 'Graph', 'Lower', 'Print', 'Punct', 'Upper', 'XDigit'.
Unicode block names, as accepted as input to java.lang.Character.UnicodeBlock#forName, prefixed by In. For example \p{InHebrew}} for all characters in the Hebrew block.
Character method names. These are all non-deprecated methods from java.lang.Characterwhose name starts with is, but with the is replaced by java. For example, \p{javaLowerCase}}.

Quantifiers

Quantifiers match some number of instances of the preceding regular expression.

*	Zero or more.
?	Zero or one.
+	One or more.
{n}	Exactly n.
{n,}	At least n.
{n,m}	At least n but not more than m.

Quantifiers are "greedy" by default, meaning that they will match the longest possible input sequence. There are also non-greedy quantifiers that match the shortest possible input sequence. They're same as the greedy ones but with a trailing ?:

*?	Zero or more (non-greedy).
??	Zero or one (non-greedy).
+?	One or more (non-greedy).
{n}?	Exactly n (non-greedy).
{n,}?	At least n (non-greedy).
{n,m}?	At least n but not more than m (non-greedy).

Quantifiers allow backtracking by default. There are also possessive quantifiers to prevent backtracking. They're same as the greedy ones but with a trailing +:

*+	Zero or more (possessive).
?+	Zero or one (possessive).
++	One or more (possessive).
{n}+	Exactly n (possessive).
{n,}+	At least n (possessive).
{n,m}+	At least n but not more than m (possessive).

Zero-width assertions

^	At beginning of line.
$	At end of line.
\A	At beginning of input.
\b	At word boundary.
\B	At non-word boundary.
\G	At end of previous match.
\z	At end of input.
\Z	At end of input, or before newline at end.

Look-around assertions

Look-around assertions assert that the subpattern does (positive) or doesn't (negative) match after (look-ahead) or before (look-behind) the current position, without including the matched text in the containing match. The maximum length of possible matches for look-behind patterns must not be unbounded.

(?=a)	Zero-width positive look-ahead.
(?!a)	Zero-width negative look-ahead.
(?<=a)	Zero-width positive look-behind.
(?<!a)	Zero-width negative look-behind.

Groups

(a)	A capturing group.
(?:a)	A non-capturing group.
(?>a)	An independent non-capturing group. (The first match of the subgroup is the only match tried.)
\n	The text already matched by capturing group n.

See Matcher#group for details of how capturing groups are numbered and accessed.

Operators

ab	Expression a followed by expression b.
a\|b	Either expression a or expression b.

Flags

(?dimsux-dimsux:a)	Evaluates the expression a with the given flags enabled/disabled.
(?dimsux-dimsux)	Evaluates the rest of the pattern with the given flags enabled/disabled.

The flags are:

i	#CASE_INSENSITIVE	case insensitive matching
d	#UNIX_LINES	only accept '\n' as a line terminator
m	#MULTILINE	allow ^ and $ to match beginning/end of any line
s	#DOTALL	allow . to match '\n' ("s" for "single line")
u	#UNICODE_CASE	enable Unicode case folding
x	#COMMENTS	allow whitespace and comments

Either set of flags may be empty. For example, (?i-m) would turn on case-insensitivity and turn off multiline mode, (?i) would just turn on case-insensitivity, and (?-m) would just turn off multiline mode.

Note that on Android, UNICODE_CASE is always on: case-insensitive matching will always be Unicode-aware.

There are two other flags not settable via this mechanism: #CANON_EQ and #LITERAL. Attempts to use #CANON_EQ on Android will throw an exception.

Implementation notes

The regular expression implementation used in Android is provided by ICU. The notation for the regular expressions is mostly a superset of those used in other Java language implementations. This means that existing applications will normally work as expected, but in rare cases Android may accept a regular expression that is not accepted by other implementations.

In some cases, Android will recognize that a regular expression is a simple special case that can be handled more efficiently. This is true of both the convenience methods in String and the methods in Pattern.

Most used methods

matcher
Creates a matcher that will match the given input against this pattern.
compile
Compiles the given regular expression into a pattern with the given flags.
quote
Returns a literal pattern String for the specifiedString.This method produces a String that can be u
split
Splits the given input sequence around matches of this pattern. The array returned by this method co
pattern
matches
Compiles the given regular expression and attempts to match the given input against it. An invocatio
toString
Returns the string representation of this pattern. This is the regular expression from which this pa
flags
Returns this pattern's match flags.
splitAsStream
asPredicate
<init>
This private constructor is used to create all Patterns. The pattern string and match flags are all
closeImpl

Popular in Java

Reading from database using SQL prepared statement
setScale (BigDecimal)
addToBackStack (FragmentTransaction)
findViewById (Activity)
BufferedWriter (java.io)
Wraps an existing Writer and buffers the output. Expensive interaction with the underlying reader is
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
ExecutorService (java.util.concurrent)
An Executor that provides methods to manage termination and methods that can produce a Future for tr
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
JFrame (javax.swing)
Runner (org.openjdk.jmh.runner)
Best IntelliJ plugins

How to usePattern in java.util.regex

Best Java code snippets using java.util.regex.Pattern (Showing top 20 results out of 132,138)

Refine search

How to use
Pattern
in
java.util.regex