CharacterReader reader = new CharacterReader(pointsData); while (!reader.isEmpty()) { final String name = reader.consumeTo('='); reader.advance(); final int cp1 = Integer.parseInt(reader.consumeToAny(codeDelims), codepointRadix); final char codeDelim = reader.current(); reader.advance(); final int cp2; if (codeDelim == ',') { cp2 = Integer.parseInt(reader.consumeTo(';'), codepointRadix); reader.advance(); } else { cp2 = empty; final String indexS = reader.consumeTo('&'); final int index = Integer.parseInt(indexS, codepointRadix); reader.advance();
String consumeDigitSequence() { bufferUp(); int start = bufPos; while (bufPos < bufLength) { char c = charBuf[bufPos]; if (c >= '0' && c <= '9') bufPos++; else break; } return cacheString(charBuf, stringCache, start, bufPos - start); }
char consume() { bufferUp(); char val = isEmptyNoBufferUp() ? EOF : charBuf[bufPos]; bufPos++; return val; }
int[] consumeCharacterReference(Character additionalAllowedCharacter, boolean inAttribute) { if (reader.isEmpty()) return null; if (additionalAllowedCharacter != null && additionalAllowedCharacter == reader.current()) return null; if (reader.matchesAnySorted(notCharRefCharsSorted)) return null; reader.mark(); if (reader.matchConsume("#")) { // numbered boolean isHexMode = reader.matchConsumeIgnoreCase("X"); String numRef = isHexMode ? reader.consumeHexSequence() : reader.consumeDigitSequence(); if (numRef.length() == 0) { // didn't match anything characterReferenceError("numeric reference with no numerals"); reader.rewindToMark(); return null; if (!reader.matchConsume(";")) characterReferenceError("missing semicolon"); // missing semi int charval = -1; String nameRef = reader.consumeLetterThenDigitSequence(); boolean looksLegit = reader.matches(';'); reader.rewindToMark(); if (looksLegit) // named with semicolon characterReferenceError(String.format("invalid named referenece '%s'", nameRef)); return null; if (inAttribute && (reader.matchesLetter() || reader.matchesDigit() || reader.matchesAny('=', '-', '_'))) {
void read(Tokeniser t, CharacterReader r) { char c = r.current(); switch (c) { case '-': t.advanceTransition(CommentEndDash); break; case nullChar: t.error(this); r.advance(); t.commentPending.data.append(replacementChar); break; case eof: t.eofError(this); t.emitCommentPending(); t.transition(Data); break; default: t.commentPending.data.append(r.consumeToAny('-', nullChar)); } } },
void read(Tokeniser t, CharacterReader r) { if (r.isEmpty()) { t.eofError(this); t.doctypePending.forceQuirks = true; t.emitDoctypePending(); t.transition(Data); return; } if (r.matchesAny('\t', '\n', '\r', '\f', ' ')) r.advance(); // ignore whitespace else if (r.matches('>')) { t.emitDoctypePending(); t.advanceTransition(Data); } else if (r.matchConsumeIgnoreCase(DocumentType.PUBLIC_KEY)) { t.doctypePending.pubSysKey = DocumentType.PUBLIC_KEY; t.transition(AfterDoctypePublicKeyword); } else if (r.matchConsumeIgnoreCase(DocumentType.SYSTEM_KEY)) { t.doctypePending.pubSysKey = DocumentType.SYSTEM_KEY; t.transition(AfterDoctypeSystemKeyword); } else { t.error(this); t.doctypePending.forceQuirks = true; t.advanceTransition(BogusDoctype); } } },
/** * Utility method to consume reader and unescape entities found within. * @param inAttribute if the text to be unescaped is in an attribute * @return unescaped string from reader */ String unescapeEntities(boolean inAttribute) { StringBuilder builder = StringUtil.stringBuilder(); while (!reader.isEmpty()) { builder.append(reader.consumeTo('&')); if (reader.matches('&')) { reader.consume(); int[] c = consumeCharacterReference(null, inAttribute); if (c == null || c.length==0) builder.append('&'); else { builder.appendCodePoint(c[0]); if (c.length == 2) builder.appendCodePoint(c[1]); } } } return builder.toString(); } }
private static void handleDataDoubleEscapeTag(Tokeniser t, CharacterReader r, TokeniserState primary, TokeniserState fallback) { if (r.matchesLetter()) { String name = r.consumeLetterSequence(); t.dataBuffer.append(name); t.emit(name); return; } char c = r.consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': case '/': case '>': if (t.dataBuffer.toString().equals("script")) t.transition(primary); else t.transition(fallback); t.emit(c); break; default: r.unconsume(); t.transition(fallback); } } }
private void characterReferenceError(String message) { if (errors.canAddError()) errors.add(new ParseError(reader.pos(), "Invalid character reference: %s", message)); }
void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '-': t.transition(CommentEnd); break; case nullChar: t.error(this); t.commentPending.data.append('-').append(replacementChar); t.transition(Comment); break; case eof: t.eofError(this); t.emitCommentPending(); t.transition(Data); break; default: t.commentPending.data.append('-').append(c); t.transition(Comment); } } },
void read(Tokeniser t, CharacterReader r) { if (r.isEmpty()) { t.eofError(this); t.transition(Data); return; } switch (r.current()) { case '-': t.emit('-'); t.advanceTransition(ScriptDataEscapedDash); break; case '<': t.advanceTransition(ScriptDataEscapedLessthanSign); break; case nullChar: t.error(this); r.advance(); t.emit(replacementChar); break; default: String data = r.consumeToAny('-', '<', nullChar); t.emit(data); } } },
if (r.matchesLetter()) { String name = r.consumeLetterSequence(); t.tagPending.appendTagName(name); t.dataBuffer.append(name); if (t.isAppropriateEndTagToken() && !r.isEmpty()) { char c = r.consume(); switch (c) { case '\t':
String consumeTo(String seq) { int offset = nextIndexOf(seq); if (offset != -1) { String consumed = cacheString(charBuf, stringCache, bufPos, offset); bufPos += offset; return consumed; } else { return consumeToEnd(); } }
void read(Tokeniser t, CharacterReader r) { if (r.matchesLetter()) { String name = r.consumeLetterSequence(); t.doctypePending.name.append(name); return; char c = r.consume(); switch (c) { case '>':
String consumeLetterThenDigitSequence() { bufferUp(); int start = bufPos; while (bufPos < bufLength) { char c = charBuf[bufPos]; if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || Character.isLetter(c)) bufPos++; else break; } while (!isEmptyNoBufferUp()) { char c = charBuf[bufPos]; if (c >= '0' && c <= '9') bufPos++; else break; } return cacheString(charBuf, stringCache, start, bufPos - start); }
void read(Tokeniser t, CharacterReader r) { switch (r.current()) { case '&': t.advanceTransition(CharacterReferenceInData); break; case '<': t.advanceTransition(TagOpen); break; case nullChar: t.error(this); // NOT replacement character (oddly?) t.emit(r.consume()); break; case eof: t.emit(new Token.EOF()); break; default: String data = r.consumeData(); t.emit(data); break; } } },