com.google.common.escape.UnicodeEscaper.escape java code examples

/**
 * Returns a string that would replace the given character in the specified escaper, or {@code
 * null} if no replacement should be made. This method is intended for use in tests through the
 * {@code EscaperAsserts} class; production users of {@link UnicodeEscaper} should limit
 * themselves to its public interface.
 *
 * @param cp the Unicode code point to escape if necessary
 * @return the replacement string, or {@code null} if no escaping was needed
 */
public static String computeReplacement(UnicodeEscaper escaper, int cp) {
 return stringOrNull(escaper.escape(cp));
}

/**
 * Returns a string that would replace the given character in the specified escaper, or {@code
 * null} if no replacement should be made. This method is intended for use in tests through the
 * {@code EscaperAsserts} class; production users of {@link UnicodeEscaper} should limit
 * themselves to its public interface.
 *
 * @param cp the Unicode code point to escape if necessary
 * @return the replacement string, or {@code null} if no escaping was needed
 */
public static String computeReplacement(UnicodeEscaper escaper, int cp) {
 return stringOrNull(escaper.escape(cp));
}

/**
 * Returns a string that would replace the given character in the specified escaper, or {@code
 * null} if no replacement should be made. This method is intended for use in tests through the
 * {@code EscaperAsserts} class; production users of {@link UnicodeEscaper} should limit
 * themselves to its public interface.
 *
 * @param cp the Unicode code point to escape if necessary
 * @return the replacement string, or {@code null} if no escaping was needed
 */
public static String computeReplacement(UnicodeEscaper escaper, int cp) {
 return stringOrNull(escaper.escape(cp));
}

public void testGrowBuffer() { // need to grow past an initial 1024 byte buffer
 StringBuilder input = new StringBuilder();
 StringBuilder expected = new StringBuilder();
 for (int i = 256; i < 1024; i++) {
  input.append((char) i);
  expected.append("[" + i + "]");
 }
 assertEquals(expected.toString(), SIMPLE_ESCAPER.escape(input.toString()));
}

public void testNullInput() {
 UnicodeEscaper e = SIMPLE_ESCAPER;
 try {
  e.escape((String) null);
  fail("Null string should cause exception");
 } catch (NullPointerException expected) {
  // Pass
 }
}

while (index < end) {
 int cp = codePointAt(csq, index, end);
 if (cp < 0 || escape(cp) != null) {
  break;

public void testFalsePositivesForNextEscapedIndex() {
 UnicodeEscaper e =
   new UnicodeEscaper() {
    // Canonical escaper method that only escapes lower case ASCII letters.
    @Override
    protected char[] escape(int cp) {
     return ('a' <= cp && cp <= 'z') ? new char[] {Character.toUpperCase((char) cp)} : null;
    }
    // Inefficient implementation that defines all letters as escapable.
    @Override
    protected int nextEscapeIndex(CharSequence csq, int index, int end) {
     while (index < end && !Character.isLetter(csq.charAt(index))) {
      index++;
     }
     return index;
    }
   };
 assertEquals("\0HELLO \uD800\uDC00 WORLD!\n", e.escape("\0HeLLo \uD800\uDC00 WorlD!\n"));
}

/** Tests that if specify '%' as safe the result is an idempotent escaper. */
public void testCustomEscaper_withpercent() {
 UnicodeEscaper e = new PercentEscaper("%", false);
 assertEquals("foo%7Cbar", e.escape("foo|bar"));
 assertEquals("foo%7Cbar", e.escape("foo%7Cbar")); // idempotent
}

char[] escaped = escape(cp);
int nextIndex = index + (Character.isSupplementaryCodePoint(cp) ? 2 : 1);
if (escaped != null) {

public void testReplacements() throws IOException {
 // In reality this is not a very sensible escaper to have (if you are only
 // escaping elements from a map you would use a ArrayBasedCharEscaper).
 UnicodeEscaper escaper =
   new ArrayBasedUnicodeEscaper(
     SIMPLE_REPLACEMENTS, Character.MIN_VALUE, Character.MAX_CODE_POINT, null) {
    @Override
    protected char[] escapeUnsafe(int c) {
     return NO_CHARS;
    }
   };
 EscaperAsserts.assertBasic(escaper);
 assertEquals("<tab>Fish <and> Chips<newline>", escaper.escape("\tFish & Chips\n"));
 // Verify that everything else is left unescaped.
 String safeChars = "\0\u0100\uD800\uDC00\uFFFF";
 assertEquals(safeChars, escaper.escape(safeChars));
 // Ensure that Unicode escapers behave correctly wrt badly formed input.
 String badUnicode = "\uDC00\uD800";
 try {
  escaper.escape(badUnicode);
  fail("should fail for bad Unicode");
 } catch (IllegalArgumentException e) {
  // Pass
 }
}

public void testSafeRange() throws IOException {
 // Basic escaping of unsafe chars (wrap them in {,}'s)
 UnicodeEscaper wrappingEscaper =
   new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 'A', 'Z', null) {
    @Override
    protected char[] escapeUnsafe(int c) {
     return ("{" + (char) c + "}").toCharArray();
    }
   };
 EscaperAsserts.assertBasic(wrappingEscaper);
 // '[' and '@' lie either side of [A-Z].
 assertEquals("{[}FOO{@}BAR{]}", wrappingEscaper.escape("[FOO@BAR]"));
}

/** Tests the various ways that the space character can be handled */
public void testPlusForSpace() {
 UnicodeEscaper basicEscaper = new PercentEscaper("", false);
 UnicodeEscaper plusForSpaceEscaper = new PercentEscaper("", true);
 UnicodeEscaper spaceEscaper = new PercentEscaper(" ", false);
 assertEquals("string%20with%20spaces", basicEscaper.escape("string with spaces"));
 assertEquals("string+with+spaces", plusForSpaceEscaper.escape("string with spaces"));
 assertEquals("string with spaces", spaceEscaper.escape("string with spaces"));
}

 public void testCodePointsFromSurrogatePairs() throws IOException {
  UnicodeEscaper surrogateEscaper =
    new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 0, 0x20000, null) {
     private final char[] escaped = new char[] {'X'};

     @Override
     protected char[] escapeUnsafe(int c) {
      return escaped;
     }
    };
  EscaperAsserts.assertBasic(surrogateEscaper);

  // A surrogate pair defining a code point within the safe range.
  String safeInput = "\uD800\uDC00"; // 0x10000
  assertEquals(safeInput, surrogateEscaper.escape(safeInput));

  // A surrogate pair defining a code point outside the safe range (but both
  // of the surrogate characters lie within the safe range). It is important
  // not to accidentally treat this as a sequence of safe characters.
  String unsafeInput = "\uDBFF\uDFFF"; // 0x10FFFF
  assertEquals("X", surrogateEscaper.escape(unsafeInput));
 }
}

public void testDeleteUnsafeChars() throws IOException {
 UnicodeEscaper deletingEscaper =
   new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, ' ', '~', null) {
    @Override
    protected char[] escapeUnsafe(int c) {
     return NO_CHARS;
    }
   };
 EscaperAsserts.assertBasic(deletingEscaper);
 assertEquals(
   "Everything outside the printable ASCII range is deleted.",
   deletingEscaper.escape(
     "\tEverything\0 outside the\uD800\uDC00 "
       + "printable ASCII \uFFFFrange is \u007Fdeleted.\n"));
}

public void testReplacementPriority() throws IOException {
 UnicodeEscaper replacingEscaper =
   new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS, ' ', '~', null) {
    private final char[] unknown = new char[] {'?'};
    @Override
    protected char[] escapeUnsafe(int c) {
     return unknown;
    }
   };
 EscaperAsserts.assertBasic(replacingEscaper);
 // Replacements are applied first regardless of whether the character is in
 // the safe range or not ('&' is a safe char while '\t' and '\n' are not).
 assertEquals(
   "<tab>Fish <and>? Chips?<newline>", replacingEscaper.escape("\tFish &\0 Chips\r\n"));
}

public void testAsUnicodeEscaper() throws IOException {
 CharEscaper charEscaper =
   createSimpleCharEscaper(
     ImmutableMap.<Character, char[]>builder()
       .put('x', "<hello>".toCharArray())
       .put('\uD800', "<hi>".toCharArray())
       .put('\uDC00', "<lo>".toCharArray())
       .build());
 UnicodeEscaper unicodeEscaper = Escapers.asUnicodeEscaper(charEscaper);
 EscaperAsserts.assertBasic(unicodeEscaper);
 assertEquals("<hello><hi><lo>", charEscaper.escape("x\uD800\uDC00"));
 assertEquals("<hello><hi><lo>", unicodeEscaper.escape("x\uD800\uDC00"));
 // Test that wrapped escapers acquire good Unicode semantics.
 assertEquals("<hi><hello><lo>", charEscaper.escape("\uD800x\uDC00"));
 try {
  unicodeEscaper.escape("\uD800x\uDC00");
  fail("should have failed for bad Unicode input");
 } catch (IllegalArgumentException e) {
  // pass
 }
 assertEquals("<lo><hi>", charEscaper.escape("\uDC00\uD800"));
 try {
  unicodeEscaper.escape("\uDC00\uD800");
  fail("should have failed for bad Unicode input");
 } catch (IllegalArgumentException e) {
  // pass
 }
}

/** Tests that the simple escaper treats 0-9, a-z and A-Z as safe */
public void testSimpleEscaper() {
 UnicodeEscaper e = new PercentEscaper("", false);
 for (char c = 0; c < 128; c++) {
  if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) {
   assertUnescaped(e, c);
  } else {
   assertEscaping(e, escapeAscii(c), c);
  }
 }
 // Testing mutlibyte escape sequences
 assertEscaping(e, "%00", '\u0000'); // nul
 assertEscaping(e, "%7F", '\u007f'); // del
 assertEscaping(e, "%C2%80", '\u0080'); // xx-00010,x-000000
 assertEscaping(e, "%DF%BF", '\u07ff'); // xx-11111,x-111111
 assertEscaping(e, "%E0%A0%80", '\u0800'); // xxx-0000,x-100000,x-00,0000
 assertEscaping(e, "%EF%BF%BF", '\uffff'); // xxx-1111,x-111111,x-11,1111
 assertUnicodeEscaping(e, "%F0%90%80%80", '\uD800', '\uDC00');
 assertUnicodeEscaping(e, "%F4%8F%BF%BF", '\uDBFF', '\uDFFF');
 // simple string tests
 assertEquals("", e.escape(""));
 assertEquals("safestring", e.escape("safestring"));
 assertEquals("embedded%00null", e.escape("embedded\0null"));
 assertEquals("max%EF%BF%BFchar", e.escape("max\uffffchar"));
}

static void assertPathEscaper(UnicodeEscaper e) {
 assertBasicUrlEscaper(e);
 assertUnescaped(e, '!');
 assertUnescaped(e, '\'');
 assertUnescaped(e, '(');
 assertUnescaped(e, ')');
 assertUnescaped(e, '~');
 assertUnescaped(e, ':');
 assertUnescaped(e, '@');
 // Don't use plus for spaces
 assertEscaping(e, "%20", ' ');
 assertEquals("safe%20with%20spaces", e.escape("safe with spaces"));
 assertEquals("foo@bar.com", e.escape("foo@bar.com"));
}

 e.escape((String) null);
 fail("Escaping null string should throw exception");
} catch (NullPointerException x) {
assertUnicodeEscaping(e, "%F4%8F%BF%BF", '\uDBFF', '\uDFFF');
assertEquals("", e.escape(""));
assertEquals("safestring", e.escape("safestring"));
assertEquals("embedded%00null", e.escape("embedded\0null"));
assertEquals("max%EF%BF%BFchar", e.escape("max\uffffchar"));

public void testUrlFormParameterEscaper() {
 UnicodeEscaper e = (UnicodeEscaper) urlFormParameterEscaper();
 // Verify that these are the same escaper (as documented)
 assertSame(e, urlFormParameterEscaper());
 assertBasicUrlEscaper(e);
 /*
  * Specified as safe by RFC 2396 but not by java.net.URLEncoder. These tests will start failing
  * when the escaper is made compliant with RFC 2396, but that's a good thing (just change them
  * to assertUnescaped).
  */
 assertEscaping(e, "%21", '!');
 assertEscaping(e, "%28", '(');
 assertEscaping(e, "%29", ')');
 assertEscaping(e, "%7E", '~');
 assertEscaping(e, "%27", '\'');
 // Plus for spaces
 assertEscaping(e, "+", ' ');
 assertEscaping(e, "%2B", '+');
 assertEquals("safe+with+spaces", e.escape("safe with spaces"));
 assertEquals("foo%40bar.com", e.escape("foo@bar.com"));
}

Javadoc

Returns the escaped form of a given literal string.

If you are escaping input in arbitrary successive chunks, then it is not generally safe to use this method. If an input string ends with an unmatched high surrogate character, then this method will throw IllegalArgumentException. You should ensure your input is valid UTF-16 before calling this method.

Note: When implementing an escaper it is a good idea to override this method for efficiency by inlining the implementation of #nextEscapeIndex(CharSequence,int,int)directly. Doing this for com.google.common.net.PercentEscaper more than doubled the performance for unescaped strings (as measured by CharEscapersBenchmark).

Popular methods of UnicodeEscaper

codePointAt
Returns the Unicode code point of the character at the given index.Unlike Character#codePointAt(Char
escapeSlow
Returns the escaped form of a given literal string, starting at the given index. This method is call
growBuffer
Helper method to grow the character buffer as needed, this only happens once in a while so it's ok i
nextEscapeIndex
Scans a sub-sequence of characters from a given CharSequence, returning the index of the next charac

Popular in Java

Parsing JSON documents to java classes using gson
getSharedPreferences (Context)
onCreateOptionsMenu (Activity)
setRequestProperty (URLConnection)
FileWriter (java.io)
A specialized Writer that writes to a file in the file system. All write requests made by calling me
GregorianCalendar (java.util)
GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
Pattern (java.util.regex)
Patterns are compiled regular expressions. In many cases, convenience methods such as String#matches
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
XPath (javax.xml.xpath)
XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
Top plugins for Android Studio

How to use escapemethodin com.google.common.escape.UnicodeEscaper

Best Java code snippets using com.google.common.escape.UnicodeEscaper.escape (Showing top 20 results out of 315)

How to use
escape
method
in
com.google.common.escape.UnicodeEscaper