/** * Returns a string that would replace the given character in the specified escaper, or {@code * null} if no replacement should be made. This method is intended for use in tests through the * {@code EscaperAsserts} class; production users of {@link UnicodeEscaper} should limit * themselves to its public interface. * * @param cp the Unicode code point to escape if necessary * @return the replacement string, or {@code null} if no escaping was needed */ public static String computeReplacement(UnicodeEscaper escaper, int cp) { return stringOrNull(escaper.escape(cp)); }
/** * Returns a string that would replace the given character in the specified escaper, or {@code * null} if no replacement should be made. This method is intended for use in tests through the * {@code EscaperAsserts} class; production users of {@link UnicodeEscaper} should limit * themselves to its public interface. * * @param cp the Unicode code point to escape if necessary * @return the replacement string, or {@code null} if no escaping was needed */ public static String computeReplacement(UnicodeEscaper escaper, int cp) { return stringOrNull(escaper.escape(cp)); }
/** * Returns a string that would replace the given character in the specified escaper, or {@code * null} if no replacement should be made. This method is intended for use in tests through the * {@code EscaperAsserts} class; production users of {@link UnicodeEscaper} should limit * themselves to its public interface. * * @param cp the Unicode code point to escape if necessary * @return the replacement string, or {@code null} if no escaping was needed */ public static String computeReplacement(UnicodeEscaper escaper, int cp) { return stringOrNull(escaper.escape(cp)); }
public void testGrowBuffer() { // need to grow past an initial 1024 byte buffer StringBuilder input = new StringBuilder(); StringBuilder expected = new StringBuilder(); for (int i = 256; i < 1024; i++) { input.append((char) i); expected.append("[" + i + "]"); } assertEquals(expected.toString(), SIMPLE_ESCAPER.escape(input.toString())); }
public void testNullInput() { UnicodeEscaper e = SIMPLE_ESCAPER; try { e.escape((String) null); fail("Null string should cause exception"); } catch (NullPointerException expected) { // Pass } }
while (index < end) { int cp = codePointAt(csq, index, end); if (cp < 0 || escape(cp) != null) { break;
public void testFalsePositivesForNextEscapedIndex() { UnicodeEscaper e = new UnicodeEscaper() { // Canonical escaper method that only escapes lower case ASCII letters. @Override protected char[] escape(int cp) { return ('a' <= cp && cp <= 'z') ? new char[] {Character.toUpperCase((char) cp)} : null; } // Inefficient implementation that defines all letters as escapable. @Override protected int nextEscapeIndex(CharSequence csq, int index, int end) { while (index < end && !Character.isLetter(csq.charAt(index))) { index++; } return index; } }; assertEquals("\0HELLO \uD800\uDC00 WORLD!\n", e.escape("\0HeLLo \uD800\uDC00 WorlD!\n")); }
char[] escaped = escape(cp); int nextIndex = index + (Character.isSupplementaryCodePoint(cp) ? 2 : 1); if (escaped != null) {
public void testReplacements() throws IOException { // In reality this is not a very sensible escaper to have (if you are only // escaping elements from a map you would use a ArrayBasedCharEscaper). UnicodeEscaper escaper = new ArrayBasedUnicodeEscaper( SIMPLE_REPLACEMENTS, Character.MIN_VALUE, Character.MAX_CODE_POINT, null) { @Override protected char[] escapeUnsafe(int c) { return NO_CHARS; } }; EscaperAsserts.assertBasic(escaper); assertEquals("<tab>Fish <and> Chips<newline>", escaper.escape("\tFish & Chips\n")); // Verify that everything else is left unescaped. String safeChars = "\0\u0100\uD800\uDC00\uFFFF"; assertEquals(safeChars, escaper.escape(safeChars)); // Ensure that Unicode escapers behave correctly wrt badly formed input. String badUnicode = "\uDC00\uD800"; try { escaper.escape(badUnicode); fail("should fail for bad Unicode"); } catch (IllegalArgumentException e) { // Pass } }
public void testSafeRange() throws IOException { // Basic escaping of unsafe chars (wrap them in {,}'s) UnicodeEscaper wrappingEscaper = new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 'A', 'Z', null) { @Override protected char[] escapeUnsafe(int c) { return ("{" + (char) c + "}").toCharArray(); } }; EscaperAsserts.assertBasic(wrappingEscaper); // '[' and '@' lie either side of [A-Z]. assertEquals("{[}FOO{@}BAR{]}", wrappingEscaper.escape("[FOO@BAR]")); }
/** Tests the various ways that the space character can be handled */ public void testPlusForSpace() { UnicodeEscaper basicEscaper = new PercentEscaper("", false); UnicodeEscaper plusForSpaceEscaper = new PercentEscaper("", true); UnicodeEscaper spaceEscaper = new PercentEscaper(" ", false); assertEquals("string%20with%20spaces", basicEscaper.escape("string with spaces")); assertEquals("string+with+spaces", plusForSpaceEscaper.escape("string with spaces")); assertEquals("string with spaces", spaceEscaper.escape("string with spaces")); }
public void testCodePointsFromSurrogatePairs() throws IOException { UnicodeEscaper surrogateEscaper = new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, 0, 0x20000, null) { private final char[] escaped = new char[] {'X'}; @Override protected char[] escapeUnsafe(int c) { return escaped; } }; EscaperAsserts.assertBasic(surrogateEscaper); // A surrogate pair defining a code point within the safe range. String safeInput = "\uD800\uDC00"; // 0x10000 assertEquals(safeInput, surrogateEscaper.escape(safeInput)); // A surrogate pair defining a code point outside the safe range (but both // of the surrogate characters lie within the safe range). It is important // not to accidentally treat this as a sequence of safe characters. String unsafeInput = "\uDBFF\uDFFF"; // 0x10FFFF assertEquals("X", surrogateEscaper.escape(unsafeInput)); } }
public void testDeleteUnsafeChars() throws IOException { UnicodeEscaper deletingEscaper = new ArrayBasedUnicodeEscaper(NO_REPLACEMENTS, ' ', '~', null) { @Override protected char[] escapeUnsafe(int c) { return NO_CHARS; } }; EscaperAsserts.assertBasic(deletingEscaper); assertEquals( "Everything outside the printable ASCII range is deleted.", deletingEscaper.escape( "\tEverything\0 outside the\uD800\uDC00 " + "printable ASCII \uFFFFrange is \u007Fdeleted.\n")); }
public void testReplacementPriority() throws IOException { UnicodeEscaper replacingEscaper = new ArrayBasedUnicodeEscaper(SIMPLE_REPLACEMENTS, ' ', '~', null) { private final char[] unknown = new char[] {'?'}; @Override protected char[] escapeUnsafe(int c) { return unknown; } }; EscaperAsserts.assertBasic(replacingEscaper); // Replacements are applied first regardless of whether the character is in // the safe range or not ('&' is a safe char while '\t' and '\n' are not). assertEquals( "<tab>Fish <and>? Chips?<newline>", replacingEscaper.escape("\tFish &\0 Chips\r\n")); }
public void testAsUnicodeEscaper() throws IOException { CharEscaper charEscaper = createSimpleCharEscaper( ImmutableMap.<Character, char[]>builder() .put('x', "<hello>".toCharArray()) .put('\uD800', "<hi>".toCharArray()) .put('\uDC00', "<lo>".toCharArray()) .build()); UnicodeEscaper unicodeEscaper = Escapers.asUnicodeEscaper(charEscaper); EscaperAsserts.assertBasic(unicodeEscaper); assertEquals("<hello><hi><lo>", charEscaper.escape("x\uD800\uDC00")); assertEquals("<hello><hi><lo>", unicodeEscaper.escape("x\uD800\uDC00")); // Test that wrapped escapers acquire good Unicode semantics. assertEquals("<hi><hello><lo>", charEscaper.escape("\uD800x\uDC00")); try { unicodeEscaper.escape("\uD800x\uDC00"); fail("should have failed for bad Unicode input"); } catch (IllegalArgumentException e) { // pass } assertEquals("<lo><hi>", charEscaper.escape("\uDC00\uD800")); try { unicodeEscaper.escape("\uDC00\uD800"); fail("should have failed for bad Unicode input"); } catch (IllegalArgumentException e) { // pass } }
/** Tests that the simple escaper treats 0-9, a-z and A-Z as safe */ public void testSimpleEscaper() { UnicodeEscaper e = new PercentEscaper("", false); for (char c = 0; c < 128; c++) { if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { assertUnescaped(e, c); } else { assertEscaping(e, escapeAscii(c), c); } } // Testing mutlibyte escape sequences assertEscaping(e, "%00", '\u0000'); // nul assertEscaping(e, "%7F", '\u007f'); // del assertEscaping(e, "%C2%80", '\u0080'); // xx-00010,x-000000 assertEscaping(e, "%DF%BF", '\u07ff'); // xx-11111,x-111111 assertEscaping(e, "%E0%A0%80", '\u0800'); // xxx-0000,x-100000,x-00,0000 assertEscaping(e, "%EF%BF%BF", '\uffff'); // xxx-1111,x-111111,x-11,1111 assertUnicodeEscaping(e, "%F0%90%80%80", '\uD800', '\uDC00'); assertUnicodeEscaping(e, "%F4%8F%BF%BF", '\uDBFF', '\uDFFF'); // simple string tests assertEquals("", e.escape("")); assertEquals("safestring", e.escape("safestring")); assertEquals("embedded%00null", e.escape("embedded\0null")); assertEquals("max%EF%BF%BFchar", e.escape("max\uffffchar")); }
static void assertPathEscaper(UnicodeEscaper e) { assertBasicUrlEscaper(e); assertUnescaped(e, '!'); assertUnescaped(e, '\''); assertUnescaped(e, '('); assertUnescaped(e, ')'); assertUnescaped(e, '~'); assertUnescaped(e, ':'); assertUnescaped(e, '@'); // Don't use plus for spaces assertEscaping(e, "%20", ' '); assertEquals("safe%20with%20spaces", e.escape("safe with spaces")); assertEquals("foo@bar.com", e.escape("foo@bar.com")); }
e.escape((String) null); fail("Escaping null string should throw exception"); } catch (NullPointerException x) { assertUnicodeEscaping(e, "%F4%8F%BF%BF", '\uDBFF', '\uDFFF'); assertEquals("", e.escape("")); assertEquals("safestring", e.escape("safestring")); assertEquals("embedded%00null", e.escape("embedded\0null")); assertEquals("max%EF%BF%BFchar", e.escape("max\uffffchar"));
public void testUrlFormParameterEscaper() { UnicodeEscaper e = (UnicodeEscaper) urlFormParameterEscaper(); // Verify that these are the same escaper (as documented) assertSame(e, urlFormParameterEscaper()); assertBasicUrlEscaper(e); /* * Specified as safe by RFC 2396 but not by java.net.URLEncoder. These tests will start failing * when the escaper is made compliant with RFC 2396, but that's a good thing (just change them * to assertUnescaped). */ assertEscaping(e, "%21", '!'); assertEscaping(e, "%28", '('); assertEscaping(e, "%29", ')'); assertEscaping(e, "%7E", '~'); assertEscaping(e, "%27", '\''); // Plus for spaces assertEscaping(e, "+", ' '); assertEscaping(e, "%2B", '+'); assertEquals("safe+with+spaces", e.escape("safe with spaces")); assertEquals("foo%40bar.com", e.escape("foo@bar.com")); }