void emitCommentPending() { emit(commentPending); }
void emitDoctypePending() { emit(doctypePending); }
void emit(char[] chars) { emit(String.valueOf(chars)); }
void emit(int[] codepoints) { emit(new String(codepoints, 0, codepoints.length)); }
void emit(char c) { emit(String.valueOf(c)); }
void emitTagPending() { tagPending.finaliseTag(); emit(tagPending); }
private void anythingElse(Tokeniser t, CharacterReader r) { t.emit("</" + t.dataBuffer.toString()); r.unconsume(); t.transition(Rcdata); } },
void read(Tokeniser t, CharacterReader r) { if (r.matches('-')) { t.emit('-'); t.advanceTransition(ScriptDataEscapeStartDash); } else { t.transition(ScriptData); } } },
void read(Tokeniser t, CharacterReader r) { if (r.matches('-')) { t.emit('-'); t.advanceTransition(ScriptDataEscapedDashDash); } else { t.transition(ScriptData); } } },
void read(Tokeniser t, CharacterReader r) { String data = r.consumeTo("]]>"); t.dataBuffer.append(data); if (r.matchConsume("]]>") || r.isEmpty()) { t.emit(new Token.CData(t.dataBuffer.toString())); t.transition(Data); }// otherwise, buffer underrun, stay in data section } };
void read(Tokeniser t, CharacterReader r) { // todo: handle bogus comment starting from eof. when does that trigger? // rewind to capture character that lead us here r.unconsume(); Token.Comment comment = new Token.Comment(); comment.bogus = true; comment.data.append(r.consumeTo('>')); // todo: replace nullChar with replaceChar t.emit(comment); t.advanceTransition(Data); } },
void read(Tokeniser t, CharacterReader r) { if (r.matches('/')) { t.emit('/'); t.createTempBuffer(); t.advanceTransition(ScriptDataDoubleEscapeEnd); } else { t.transition(ScriptDataDoubleEscaped); } } },
void read(Tokeniser t, CharacterReader r) { if (r.matches('/')) { t.createTempBuffer(); t.advanceTransition(RawtextEndTagOpen); } else { t.emit('<'); t.transition(Rawtext); } } },
private static void readEndTag(Tokeniser t, CharacterReader r, TokeniserState a, TokeniserState b) { if (r.matchesLetter()) { t.createTagPending(false); t.transition(a); } else { t.emit("</"); t.transition(b); } }
void read(Tokeniser t, CharacterReader r) { if (r.matchesLetter()) { t.createTempBuffer(); t.dataBuffer.append(r.current()); t.emit("<" + r.current()); t.advanceTransition(ScriptDataDoubleEscapeStart); } else if (r.matches('/')) { t.createTempBuffer(); t.advanceTransition(ScriptDataEscapedEndTagOpen); } else { t.emit('<'); t.transition(ScriptDataEscaped); } } },
void read(Tokeniser t, CharacterReader r) { if (r.matchesLetter()) { t.createTagPending(false); t.tagPending.appendTagName(r.current()); t.dataBuffer.append(r.current()); t.advanceTransition(ScriptDataEscapedEndTagName); } else { t.emit("</"); t.transition(ScriptDataEscaped); } } },
void read(Tokeniser t, CharacterReader r) { if (r.matchesLetter()) { t.createTagPending(false); t.tagPending.appendTagName(r.current()); t.dataBuffer.append(r.current()); t.advanceTransition(RCDATAEndTagName); } else { t.emit("</"); t.transition(Rcdata); } } },
Element insert(Token.StartTag startTag) { // handle empty unknown tags // when the spec expects an empty tag, will directly hit insertEmpty, so won't generate this fake end tag. if (startTag.isSelfClosing()) { Element el = insertEmpty(startTag); stack.add(el); tokeniser.transition(TokeniserState.Data); // handles <script />, otherwise needs breakout steps from script data tokeniser.emit(emptyEnd.reset().name(el.tagName())); // ensure we get out of whatever state we are in. emitted for yielded processing return el; } Element el = new Element(Tag.valueOf(startTag.name(), settings), baseUri, settings.normalizeAttributes(startTag.attributes)); insert(el); return el; }