/** * Decodes the specified HTML encoded text into normal text. * <p> * All {@linkplain CharacterEntityReference character entity references} and {@linkplain NumericCharacterReference numeric character references} * are converted to their respective characters. * <p> * This is equivalent to {@link #decode(CharSequence,boolean) decode(encodedText,false)}. * <p> * <a href="#Unterminated">Unterminated</a> character references are dealt with according to the rules for * text outside of attribute values in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}. * <p> * If the static {@link Config#ConvertNonBreakingSpaces} property is set to <code>true</code> (the default), * then all non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to normal spaces. * <p> * Although character entity reference names are case sensitive, and in some cases differ from other entity references only by their case, * some browsers also recognise them in a case-insensitive way. * For this reason, all decoding methods in this library recognise character entity reference names even if they are in the wrong case. * * @param encodedText the text to decode. * @return the decoded string. * @see #encode(CharSequence) */ public static String decode(final CharSequence encodedText) { return decode(encodedText,false,Config.ConvertNonBreakingSpaces); }
/** * Re-encodes the specified text, equivalent to {@linkplain #decode(CharSequence) decoding} and then {@linkplain #encode(CharSequence) encoding} again. * <p> * This process ensures that the specified encoded text does not contain any remaining unencoded characters. * <p> * IMPLEMENTATION NOTE: At present this method simply calls the {@link #decode(CharSequence) decode} method followed by the * {@link #encode(CharSequence) encode} method, both with <code>insideAttributeValue</code> set to <code>true</code>. * * @param encodedText the text to re-encode. * @return the re-encoded string. */ public static String reencode(final CharSequence encodedText) { return encode(decode(encodedText,true),true); }
private String parseText(int start, int end) { StringBuilder sb = new StringBuilder(); while (start < end) { CharacterReference ref = source.getNextCharacterReference(start); if (ref == null || ref.getBegin() >= end) { break; } sb.append(source.subSequence(start, ref.getBegin())); sb.append(ref.getChar()); start = ref.getEnd(); } sb.append(source.subSequence(start, end)); return sb.toString(); }
private static void appendTidyValue(final Appendable appendable, final CharSequence unencodedValue) throws IOException { CharacterReference.appendEncode(appendable,CharacterReference.decode(unencodedValue,true),true); }
static String decodeCollapseWhiteSpace(final CharSequence text, final boolean convertNonBreakingSpaces) { return decode(appendCollapseWhiteSpace(new StringBuilder(text.length()),text),false,convertNonBreakingSpaces); }
private void appendCharTo(Appendable appendable, final boolean convertNonBreakingSpaces) throws IOException { if (Character.isSupplementaryCodePoint(codePoint)) { appendable.append(getHighSurrogate(codePoint)); appendable.append(getLowSurrogate(codePoint)); } else { final char ch=getChar(); if (ch==CharacterEntityReference._nbsp && convertNonBreakingSpaces) { appendable.append(' '); } else { appendable.append(ch); } } }
private static Appendable appendDecode(final Appendable appendable, final Segment segment, final int searchBegin, final boolean insideAttributeValue, final boolean convertNonBreakingSpaces) throws IOException { final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings=Config.CurrentCompatibilityMode.getUnterminatedCharacterReferenceSettings(insideAttributeValue); final Source source=segment.source; final ParseText parseText=source.getParseText(); final int end=segment.getEnd(); int begin=segment.getBegin(); int pos=parseText.indexOf('&',begin+searchBegin,end); while (pos!=-1) { final CharacterReference characterReference=CharacterReference.construct(source,pos,unterminatedCharacterReferenceSettings); if (characterReference!=null) { appendable.append(source.substring(begin,pos)); // Don't use appendable.append(source,begin,pos) as it checks source.length() which may throw an exception when using StreamedSource. characterReference.appendCharTo(appendable,convertNonBreakingSpaces); begin=characterReference.getEnd(); pos=parseText.indexOf('&',begin,end); } else { pos=parseText.indexOf('&',pos+1,end); } } appendable.append(source.substring(begin,end)); return appendable; }
/** * Encodes the specified text, escaping certain characters into character references. * <p> * This is equivalent to {@link #encode(CharSequence,boolean) encode(unencodedText,true)}. * * @param unencodedText the text to encode. * @return the encoded string. */ public static String encode(final CharSequence unencodedText) { return encode(unencodedText,true); }
final String getDisplayValueHTML(final CharSequence text, final boolean whiteSpaceFormatting) { final StringBuilder sb=new StringBuilder((text==null ? 0 : text.length()*2)+50); sb.append('<').append(FormControlOutputStyle.ConfigDisplayValue.ElementName); try { for (String attributeName : FormControlOutputStyle.ConfigDisplayValue.AttributeNames) { final CharSequence attributeValue=elementContainer.getAttributeValue(attributeName); if (attributeValue==null) continue; Attribute.appendHTML(sb,attributeName,attributeValue); } sb.append('>'); if (text==null || text.length()==0) { sb.append(FormControlOutputStyle.ConfigDisplayValue.EmptyHTML); } else { if (whiteSpaceFormatting) { sb.append(CharacterReference.encodeWithWhiteSpaceFormatting(text)); } else { CharacterReference.appendEncode(sb,text,false); } } } catch (IOException ex) {throw new RuntimeException(ex);} // never happens sb.append(EndTagType.START_DELIMITER_PREFIX).append(FormControlOutputStyle.ConfigDisplayValue.ElementName).append('>'); return sb.toString(); }
if (!isWhiteSpace(ch)) { appendEncode(appendable,ch,false); continue;
private static Appendable appendDecode(final Appendable appendable, final CharSequence encodedText, final int searchBegin, final boolean insideAttributeValue, final boolean convertNonBreakingSpaces) throws IOException { if (encodedText instanceof Segment) return appendDecode(appendable,(Segment)encodedText,searchBegin,insideAttributeValue,convertNonBreakingSpaces); final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings=Config.CurrentCompatibilityMode.getUnterminatedCharacterReferenceSettings(insideAttributeValue); final StreamedSource streamedSource=new StreamedSource(encodedText).setHandleTags(false).setUnterminatedCharacterReferenceSettings(unterminatedCharacterReferenceSettings).setSearchBegin(searchBegin); for (Segment segment : streamedSource) { if (segment instanceof CharacterReference) { ((CharacterReference)segment).appendCharTo(appendable,convertNonBreakingSpaces); } else { appendable.append(segment.toString()); // benchmark tests reveal (surprisingly) that converting to a string before appending is faster than appending the specified section of the encodedText or segment directly. // appendable.append(encodedText,segment.begin,segment.end); // appendable.append(segment); } } return appendable; }
static Appendable appendHTML(final Appendable appendable, final CharSequence name, final CharSequence value) throws IOException { appendable.append(' ').append(name); if (value!=null) { appendable.append("=\""); CharacterReference.appendEncode(appendable,value,true); appendable.append('"'); } return appendable; } }
/** * Returns the <a href="NumericCharacterReference.html#DecimalCharacterReference">decimal encoded form</a> of the specified unicode code point. * <p> * <dl> * <dt>Example:</dt> * <dd><code>CharacterReference.getDecimalCharacterReferenceString('>')</code> returns "<code>&#62;</code>"</dd> * </dl> * * @param codePoint the unicode code point to encode. * @return the decimal encoded form of the specified unicode code point. * @see #getCharacterReferenceString(int codePoint) * @see #getHexadecimalCharacterReferenceString(int codePoint) */ public static String getDecimalCharacterReferenceString(final int codePoint) { try { return appendDecimalCharacterReferenceString(new StringBuilder(),codePoint).toString(); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens }
/** * Appends the character represented by this character reference to the specified appendable object. * <p> * If this character is a unicode <a target="_blank" href="http://unicode.org/glossary/#supplementary_character">supplementary character</a>, * then both the UTF-16 high/low surrogate <code>char</code> values of the of the character are appended, as described in the * <a target="_blank" href="http://java.sun.com/javase/6/docs/api/java/lang/Character.html#unicode">Unicode character representations</a> section of the * <code>java.lang.Character</code> class. * <p> * If the static {@link Config#ConvertNonBreakingSpaces} property is set to <code>true</code> (the default), * then calling this method on a non-breaking space character reference ({@link CharacterEntityReference#_nbsp &nbsp;}) * results in a normal space being appended. * * @param appendable the object to append this character reference to. */ public final void appendCharTo(Appendable appendable) throws IOException { appendCharTo(appendable,Config.ConvertNonBreakingSpaces); }
static final String decode(final CharSequence encodedText, final boolean insideAttributeValue, final boolean convertNonBreakingSpaces) { if (encodedText==null) return null; final String encodedTextString=encodedText.toString(); // converting to string first is faster than searching the CharSequence directly. final int firstAmpersandPos=encodedTextString.indexOf('&'); if (firstAmpersandPos==-1) return encodedTextString; try { return appendDecode(new StringBuilder(encodedText.length()),encodedText,firstAmpersandPos,insideAttributeValue,convertNonBreakingSpaces).toString(); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens }
@RequestMapping(value = { "/user/edit/emailAddresses" }, method = RequestMethod.GET) public ModelAndView handleEmailAddresses(@RequestParam(value = "newEmail", required=false) String email) { UserProfile userProfile = this.userBusinessService.getCurrentUserProfile(); Map<String, Object> model = getSynchronizedEmailAddressesModel(userProfile); model.put("newEmail", CharacterReference.encode(email)); return new ModelAndView(USER_EMAIL_ADRESSES_EDIT, model); }
/** * Encodes the specified character into a character reference if {@linkplain Config#CurrentCharacterReferenceEncodingBehaviour required}. * <p> * The encoding of the character follows the same rules as for each character in the {@link #encode(CharSequence unencodedText, boolean insideAttributeValue)} method, * with <code>insideAttributeValue</code> set to <code>true</code>. * * @param ch the character to encode. * @return a character reference if appropriate, otherwise a string containing the original character. */ public static String encode(final char ch) { try { return appendEncode(new StringBuilder(MAX_ENTITY_REFERENCE_LENGTH),ch,true).toString(); } catch (IOException ex) {throw new RuntimeException(ex);} // never happens }
private static final Appendable appendEncode(final Appendable appendable, final char ch, final boolean insideAttributeValue) throws IOException { if (Config.CurrentCharacterReferenceEncodingBehaviour.isEncoded(ch,insideAttributeValue)) { final String characterEntityReferenceName=CharacterEntityReference.getName(ch); if (characterEntityReferenceName!=null && ch!='\'') { CharacterEntityReference.appendCharacterReferenceString(appendable,characterEntityReferenceName); } else { appendDecimalCharacterReferenceString(appendable,ch); } } else { appendable.append(ch); } return appendable; }
/** * Decodes the specified HTML encoded text into normal text. * <p> * All {@linkplain CharacterEntityReference character entity references} and {@linkplain NumericCharacterReference numeric character references} * are converted to their respective characters. * <p> * <a href="#Unterminated">Unterminated</a> character references are dealt with according to the * value of the <code>insideAttributeValue</code> parameter and the * {@linkplain Config#CurrentCompatibilityMode current compatibility mode}. * <p> * If the static {@link Config#ConvertNonBreakingSpaces} property is set to <code>true</code> (the default), * then all non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to normal spaces. * <p> * Although character entity reference names are case sensitive, and in some cases differ from other entity references only by their case, * some browsers also recognise them in a case-insensitive way. * For this reason, all decoding methods in this library recognise character entity reference names even if they are in the wrong case. * * @param encodedText the text to decode. * @param insideAttributeValue specifies whether the encoded text is inside an attribute value. * @return the decoded string. * @see #decode(CharSequence) * @see #encode(CharSequence) */ public static String decode(final CharSequence encodedText, final boolean insideAttributeValue) { return decode(encodedText,insideAttributeValue,Config.ConvertNonBreakingSpaces); }
private String parseText(int start, int end) { StringBuilder sb = new StringBuilder(); while (start < end) { CharacterReference ref = source.getNextCharacterReference(start); if (ref == null || ref.getBegin() >= end) { break; } sb.append(source.subSequence(start, ref.getBegin())); sb.append(ref.getChar()); start = ref.getEnd(); } sb.append(source.subSequence(start, end)); return sb.toString(); }