/** * <p>Constructs a <code>NumericEntityEscaper</code> between the specified values (inclusive). </p> * * @param codepointLow above which to escape * @param codepointHigh below which to escape * @return the newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper between(final int codepointLow, final int codepointHigh) { return new NumericEntityEscaper(codepointLow, codepointHigh, true); }
/** * <p>Constructs a <code>NumericEntityEscaper</code> below the specified value (exclusive). </p> * * @param codepoint below which to escape * @return the newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper below(final int codepoint) { return outsideOf(codepoint, Integer.MAX_VALUE); }
@Test public void testSupplementary() { final NumericEntityEscaper nee = new NumericEntityEscaper(); final String input = "\uD803\uDC22"; final String expected = "𐰢"; final String result = nee.translate(input); assertEquals("Failed to escape numeric entities supplementary characters", expected, result); }
@Test public void testEscapeXmlAllCharacters() { // http://www.w3.org/TR/xml/#charsets says: // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode character, // excluding the surrogate blocks, FFFE, and FFFF. */ final CharSequenceTranslator escapeXml = StringEscapeUtils.ESCAPE_XML .with(NumericEntityEscaper.below(9), NumericEntityEscaper.between(0xB, 0xC), NumericEntityEscaper.between(0xE, 0x19), NumericEntityEscaper.between(0xD800, 0xDFFF), NumericEntityEscaper.between(0xFFFE, 0xFFFF), NumericEntityEscaper.above(0x110000)); assertEquals("�", escapeXml.translate("\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008")); assertEquals("\t", escapeXml.translate("\t")); // 0x9 assertEquals("\n", escapeXml.translate("\n")); // 0xA assertEquals("", escapeXml.translate("\u000B\u000C")); assertEquals("\r", escapeXml.translate("\r")); // 0xD assertEquals("Hello World! Ain't this great?", escapeXml.translate("Hello World! Ain't this great?")); assertEquals("", escapeXml.translate("\u000E\u000F\u0018\u0019")); }
/** * Tests Supplementary characters. * <p> * From http://www.w3.org/International/questions/qa-escapes * </p> * <blockquote> * Supplementary characters are those Unicode characters that have code points higher than the characters in * the Basic Multilingual Plane (BMP). In UTF-16 a supplementary character is encoded using two 16-bit surrogate code points from the * BMP. Because of this, some people think that supplementary characters need to be represented using two escapes, but this is incorrect * - you must use the single, code point value for that character. For example, use &#x233B4; rather than * &#xD84C;&#xDFB4;. * </blockquote> * @see <a href="http://www.w3.org/International/questions/qa-escapes">Using character escapes in markup and CSS</a> * @see <a href="https://issues.apache.org/jira/browse/LANG-728">LANG-728</a> */ @Test public void testEscapeXmlSupplementaryCharacters() { final CharSequenceTranslator escapeXml = StringEscapeUtils.ESCAPE_XML.with( NumericEntityEscaper.between(0x7f, Integer.MAX_VALUE) ); assertEquals("Supplementary character must be represented using a single escape", "𣎴", escapeXml.translate("\uD84C\uDFB4")); assertEquals("Supplementary characters mixed with basic characters should be encoded correctly", "a b c 𣎴", escapeXml.translate("a b c \uD84C\uDFB4")); }
/** * <p>Constructs a <code>NumericEntityEscaper</code> outside of the specified values (exclusive). </p> * * @param codepointLow below which to escape * @param codepointHigh above which to escape * @return the newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper outsideOf(final int codepointLow, final int codepointHigh) { return new NumericEntityEscaper(codepointLow, codepointHigh, false); }
/** * <p>Constructs a <code>NumericEntityEscaper</code> above the specified value (exclusive). </p> * * @param codepoint above which to escape * @return the newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper above(final int codepoint) { return outsideOf(0, codepoint); }
/** * <p>Constructs a <code>NumericEntityEscaper</code> between the specified values (inclusive). </p> * * @param codepointLow above which to escape * @param codepointHigh below which to escape * @return the newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper between(final int codepointLow, final int codepointHigh) { return new NumericEntityEscaper(codepointLow, codepointHigh, true); }
/** * <p>Constructs a <code>NumericEntityEscaper</code> below the specified value (exclusive). </p> * * @param codepoint below which to escape * @return the newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper below(final int codepoint) { return outsideOf(codepoint, Integer.MAX_VALUE); }
/** * Writes an attribute definition. */ protected void writeAttribute( String name, String value) { print( " "); print( name); print( "=\""); // StringEscapeUtils escapes symbols ', < >, &, ", and some control characters // NumericEntityEscaper translates additional control characters \n, \t, ... print( NumericEntityEscaper.below(0x20).translate(StringEscapeUtils.escapeXml11(value))); print( "\""); }
/** * <p>Constructs a <code>NumericEntityEscaper</code> outside of the specified values (exclusive). </p> * * @param codepointLow below which to escape * @param codepointHigh above which to escape * @return the newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper outsideOf(final int codepointLow, final int codepointHigh) { return new NumericEntityEscaper(codepointLow, codepointHigh, false); }
/** * <p>Constructs a <code>NumericEntityEscaper</code> above the specified value (exclusive). </p> * * @param codepoint above which to escape * @return the newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper above(final int codepoint) { return outsideOf(0, codepoint); }
/** * <p>Constructs a <code>NumericEntityEscaper</code> outside of the specified values (exclusive). </p> * * @param codepointLow below which to escape * @param codepointHigh above which to escape * @return the newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper outsideOf(final int codepointLow, final int codepointHigh) { return new NumericEntityEscaper(codepointLow, codepointHigh, false); }
/** * <p>Constructs a <code>NumericEntityEscaper</code> above the specified value (exclusive). </p> * * @param codepoint above which to escape * @return the newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper above(final int codepoint) { return outsideOf(0, codepoint); }
/** * <p>Constructs a <code>NumericEntityEscaper</code> between the specified values (inclusive). </p> * * @param codepointLow above which to escape * @param codepointHigh below which to escape * @return the newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper between(final int codepointLow, final int codepointHigh) { return new NumericEntityEscaper(codepointLow, codepointHigh, true); }
/** * <p>Constructs a <code>NumericEntityEscaper</code> below the specified value (exclusive). </p> * * @param codepoint below which to escape * @return the newly created {@code NumericEntityEscaper} instance */ public static NumericEntityEscaper below(final int codepoint) { return outsideOf(codepoint, Integer.MAX_VALUE); }