/** * Parses a single encoded character reference text into a <code>CharacterReference</code> object. * <p> * The character reference must be at the start of the given text, but may contain other characters at the end. * The {@link #getEnd() getEnd()} method can be used on the resulting object to determine at which character position the character reference ended. * <p> * If the text does not represent a valid character reference, this method returns <code>null</code>. * <p> * <a href="#Unterminated">Unterminated</a> character references are always accepted, regardless of the settings in the * {@linkplain Config#CurrentCompatibilityMode current compatibility mode}. * <p> * To decode <i>all</i> character references in a given text, use the {@link #decode(CharSequence)} method instead. * <p> * <dl> * <dt>Example:</dt> * <dd><code>CharacterReference.parse("&gt;").getChar()</code> returns '<code>></code>'</dd> * </dl> * * @param characterReferenceText the text containing a single encoded character reference. * @return a <code>CharacterReference</code> object representing the specified text, or <code>null</code> if the text does not represent a valid character reference. * @see #decode(CharSequence) */ public static CharacterReference parse(final CharSequence characterReferenceText) { return construct(new Source(characterReferenceText,true),0,Config.UnterminatedCharacterReferenceSettings.ACCEPT_ALL); }
private static CharacterReference getNext(final Source source, int pos, final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings) { final ParseText parseText=source.getParseText(); pos=parseText.indexOf('&',pos); while (pos!=-1) { final CharacterReference characterReference=construct(source,pos,unterminatedCharacterReferenceSettings); if (characterReference!=null) return characterReference; pos=parseText.indexOf('&',pos+1); } return null; }
private static CharacterReference getPrevious(final Source source, int pos, final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings) { final ParseText parseText=source.getParseText(); pos=parseText.lastIndexOf('&',pos); while (pos!=-1) { final CharacterReference characterReference=construct(source,pos,unterminatedCharacterReferenceSettings); if (characterReference!=null) return characterReference; pos=parseText.lastIndexOf('&',pos-1); } return null; }
private Segment nextNonTagSegment(final int begin, final int end) { if (!legacyIteratorCompatabilityMode) { final CharacterReference characterReference=characterReferenceAtCurrentPosition; if (characterReference!=null) { characterReferenceAtCurrentPosition=null; pos=characterReference.end; return characterReference; } final ParseText parseText=source.getParseText(); int potentialCharacterReferenceBegin=parseText.indexOf('&',begin,end); while (potentialCharacterReferenceBegin!=-1) { final CharacterReference nextCharacterReference=CharacterReference.construct(source,potentialCharacterReferenceBegin,Config.UnterminatedCharacterReferenceSettings.ACCEPT_ALL); if (nextCharacterReference!=null) { if (potentialCharacterReferenceBegin==begin) { pos=nextCharacterReference.end; return nextCharacterReference; } else { pos=nextCharacterReference.begin; characterReferenceAtCurrentPosition=nextCharacterReference; return new Segment(source,begin,pos); } } potentialCharacterReferenceBegin=parseText.indexOf('&',potentialCharacterReferenceBegin+1,end); } } return new Segment(source,begin,pos=end); }
private static Appendable appendDecode(final Appendable appendable, final Segment segment, final int searchBegin, final boolean insideAttributeValue, final boolean convertNonBreakingSpaces) throws IOException { final Config.UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettings=Config.CurrentCompatibilityMode.getUnterminatedCharacterReferenceSettings(insideAttributeValue); final Source source=segment.source; final ParseText parseText=source.getParseText(); final int end=segment.getEnd(); int begin=segment.getBegin(); int pos=parseText.indexOf('&',begin+searchBegin,end); while (pos!=-1) { final CharacterReference characterReference=CharacterReference.construct(source,pos,unterminatedCharacterReferenceSettings); if (characterReference!=null) { appendable.append(source.substring(begin,pos)); // Don't use appendable.append(source,begin,pos) as it checks source.length() which may throw an exception when using StreamedSource. characterReference.appendCharTo(appendable,convertNonBreakingSpaces); begin=characterReference.getEnd(); pos=parseText.indexOf('&',begin,end); } else { pos=parseText.indexOf('&',pos+1,end); } } appendable.append(source.substring(begin,end)); return appendable; }
if (ch=='&') { if (i>=source.fullSequentialParseData[0]) { // do not handle character references inside tags or script elements final CharacterReference characterReference=CharacterReference.construct(source,i,unterminatedCharacterReferenceSettings); if (characterReference!=null) return characterReference;