void emit(Token token) { Validate.isFalse(isEmitPending, "There is an unread token pending!"); emitPending = token; isEmitPending = true; if (token.type == Token.TokenType.StartTag) { Token.StartTag startTag = (Token.StartTag) token; lastStartTag = startTag.tagName; } else if (token.type == Token.TokenType.EndTag) { Token.EndTag endTag = (Token.EndTag) token; if (endTag.attributes != null) error("Attributes incorrectly present on end tag"); } }
void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '"': t.transition(AfterDoctypeSystemIdentifier); break; case nullChar: t.error(this); t.doctypePending.systemIdentifier.append(replacementChar); break; case '>': t.error(this); t.doctypePending.forceQuirks = true; t.emitDoctypePending(); t.transition(Data); break; case eof: t.eofError(this); t.doctypePending.forceQuirks = true; t.emitDoctypePending(); t.transition(Data); break; default: t.doctypePending.systemIdentifier.append(c); } } },
void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\'': t.transition(AfterDoctypeSystemIdentifier); break; case nullChar: t.error(this); t.doctypePending.systemIdentifier.append(replacementChar); break; case '>': t.error(this); t.doctypePending.forceQuirks = true; t.emitDoctypePending(); t.transition(Data); break; case eof: t.eofError(this); t.doctypePending.forceQuirks = true; t.emitDoctypePending(); t.transition(Data); break; default: t.doctypePending.systemIdentifier.append(c); } } },
void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '"': t.transition(AfterDoctypePublicIdentifier); break; case nullChar: t.error(this); t.doctypePending.publicIdentifier.append(replacementChar); break; case '>': t.error(this); t.doctypePending.forceQuirks = true; t.emitDoctypePending(); t.transition(Data); break; case eof: t.eofError(this); t.doctypePending.forceQuirks = true; t.emitDoctypePending(); t.transition(Data); break; default: t.doctypePending.publicIdentifier.append(c); } } },
void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '-': t.transition(CommentEnd); break; case nullChar: t.error(this); t.commentPending.data.append('-').append(replacementChar); t.transition(Comment); break; case eof: t.eofError(this); t.emitCommentPending(); t.transition(Data); break; default: t.commentPending.data.append('-').append(c); t.transition(Comment); } } },
void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '-': t.transition(CommentStartDash); break; case nullChar: t.error(this); t.commentPending.data.append(replacementChar); t.transition(Comment); break; case '>': t.error(this); t.emitCommentPending(); t.transition(Data); break; case eof: t.eofError(this); t.emitCommentPending(); t.transition(Data); break; default: t.commentPending.data.append(c); t.transition(Comment); } } },
void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': t.transition(BeforeDoctypeName); break; case eof: t.eofError(this); // note: fall through to > case case '>': // catch invalid <!DOCTYPE> t.error(this); t.createDoctypePending(); t.doctypePending.forceQuirks = true; t.emitDoctypePending(); t.transition(Data); break; default: t.error(this); t.transition(BeforeDoctypeName); } } },
void read(Tokeniser t, CharacterReader r) { char c = r.current(); switch (c) { case '-': t.advanceTransition(CommentEndDash); break; case nullChar: t.error(this); r.advance(); t.commentPending.data.append(replacementChar); break; case eof: t.eofError(this); t.emitCommentPending(); t.transition(Data); break; default: t.commentPending.data.append(r.consumeToAny('-', nullChar)); } } },
void read(Tokeniser t, CharacterReader r) { if (r.isEmpty()) { t.eofError(this); t.emit("</"); t.transition(Data); } else if (r.matchesLetter()) { t.createTagPending(false); t.transition(TagName); } else if (r.matches('>')) { t.error(this); t.advanceTransition(Data); } else { t.error(this); t.advanceTransition(BogusComment); } } },
void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '>': t.tagPending.selfClosing = true; t.emitTagPending(); t.transition(Data); break; case eof: t.eofError(this); t.transition(Data); break; default: t.error(this); r.unconsume(); t.transition(BeforeAttributeName); } } },
Element insertEmpty(Token.StartTag startTag) { Tag tag = Tag.valueOf(startTag.name(), settings); Element el = new Element(tag, baseUri, startTag.attributes); insertNode(el); if (startTag.isSelfClosing()) { if (tag.isKnownTag()) { if (!tag.isEmpty()) tokeniser.error("Tag cannot be self closing; not a void tag"); } else // unknown tag, remember this is self closing for output tag.setSelfClosing(); } return el; }
void read(Tokeniser t, CharacterReader r) { switch (r.current()) { case nullChar: t.error(this); r.advance(); t.emit(replacementChar); break; case eof: t.emit(new Token.EOF()); break; default: String data = r.consumeTo(nullChar); t.emit(data); break; } } },
private static void readData(Tokeniser t, CharacterReader r, TokeniserState current, TokeniserState advance) { switch (r.current()) { case '<': t.advanceTransition(advance); break; case nullChar: t.error(current); r.advance(); t.emit(replacementChar); break; case eof: t.emit(new Token.EOF()); break; default: String data = r.consumeToAny('<', nullChar); // todo - why hunt for null here? Just consumeTo'<'? t.emit(data); break; } }
void read(Tokeniser t, CharacterReader r) { char c = r.consume(); switch (c) { case '\t': case '\n': case '\r': case '\f': case ' ': break; case '>': t.emitDoctypePending(); t.transition(Data); break; case eof: t.eofError(this); t.doctypePending.forceQuirks = true; t.emitDoctypePending(); t.transition(Data); break; default: t.error(this); t.transition(BogusDoctype); // NOT force quirks } } },
void read(Tokeniser t, CharacterReader r) { if (r.matchConsume("--")) { t.createCommentPending(); t.transition(CommentStart); } else if (r.matchConsumeIgnoreCase("DOCTYPE")) { t.transition(Doctype); } else if (r.matchConsume("[CDATA[")) { // todo: should actually check current namepspace, and only non-html allows cdata. until namespace // is implemented properly, keep handling as cdata //} else if (!t.currentNodeInHtmlNS() && r.matchConsume("[CDATA[")) { t.createTempBuffer(); t.transition(CdataSection); } else { t.error(this); t.advanceTransition(BogusComment); // advance so this character gets in bogus comment data's rewind } } },
void read(Tokeniser t, CharacterReader r) { switch (r.current()) { case '&': t.advanceTransition(CharacterReferenceInData); break; case '<': t.advanceTransition(TagOpen); break; case nullChar: t.error(this); // NOT replacement character (oddly?) t.emit(r.consume()); break; case eof: t.emit(new Token.EOF()); break; default: String data = r.consumeData(); t.emit(data); break; } } },
void read(Tokeniser t, CharacterReader r) { switch (r.current()) { case '&': t.advanceTransition(CharacterReferenceInRcdata); break; case '<': t.advanceTransition(RcdataLessthanSign); break; case nullChar: t.error(this); r.advance(); t.emit(replacementChar); break; case eof: t.emit(new Token.EOF()); break; default: String data = r.consumeToAny('&', '<', nullChar); t.emit(data); break; } } },
void read(Tokeniser t, CharacterReader r) { switch (r.current()) { case '!': t.advanceTransition(MarkupDeclarationOpen); break; case '/': t.advanceTransition(EndTagOpen); break; case '?': t.advanceTransition(BogusComment); break; default: if (r.matchesLetter()) { t.createTagPending(true); t.transition(TagName); } else { t.error(this); t.emit('<'); // char that got us here t.transition(Data); } break; } } },
void read(Tokeniser t, CharacterReader r) { if (r.isEmpty()) { t.eofError(this); t.transition(Data); return; } switch (r.current()) { case '-': t.emit('-'); t.advanceTransition(ScriptDataEscapedDash); break; case '<': t.advanceTransition(ScriptDataEscapedLessthanSign); break; case nullChar: t.error(this); r.advance(); t.emit(replacementChar); break; default: String data = r.consumeToAny('-', '<', nullChar); t.emit(data); } } },
void read(Tokeniser t, CharacterReader r) { char c = r.current(); switch (c) { case '-': t.emit(c); t.advanceTransition(ScriptDataDoubleEscapedDash); break; case '<': t.emit(c); t.advanceTransition(ScriptDataDoubleEscapedLessthanSign); break; case nullChar: t.error(this); r.advance(); t.emit(replacementChar); break; case eof: t.eofError(this); t.transition(Data); break; default: String data = r.consumeToAny('-', '<', nullChar); t.emit(data); } } },