protected Document parse(InputStream input) throws IOException { return Jsoup.parse(input, "UTF-8", "", new Parser(new Asta4DTagSupportHtmlTreeBuilder())); }
void clearStackToTableRowContext() { clearStackToContext("tr"); }
private boolean exitTableBody(Token t, Asta4DTagSupportHtmlTreeBuilder tb) { if (!(tb.inTableScope("tbody") || tb.inTableScope("thead") || tb.inScope("tfoot"))) { // frag case tb.error(this); return false; } tb.clearStackToTableBodyContext(); tb.process(new Token.EndTag(tb.currentElement().nodeName())); // tbody, tfoot, thead return tb.process(t); }
boolean anythingElse(Token t, Asta4DTagSupportHtmlTreeBuilder tb) { tb.error(this); boolean processed = true; if (StringUtil.in(tb.currentElement().nodeName(), "table", "tbody", "tfoot", "thead", "tr")) { tb.setFosterInserts(true); processed = tb.process(t, InBody); tb.setFosterInserts(false); } else { processed = tb.process(t, InBody); } return processed; } },
tb.error(this); return false; } else if (tb.framesetOk() && isWhitespace(c)) { // don't check if whitespace if frames already closed tb.reconstructFormattingElements(); tb.insert(c); } else { tb.reconstructFormattingElements(); tb.insert(c); tb.framesetOk(false); tb.insert(t.asComment()); break; tb.error(this); return false; String name = startTag.name(); if (name.equals("html")) { tb.error(this); Element html = tb.getStack().getFirst(); for (Attribute attribute : startTag.getAttributes()) { if (!html.hasAttr(attribute.getKey())) return tb.process(t, InHead); } else if (name.equals("body")) { tb.error(this); LinkedList<Element> stack = tb.getStack(); if (stack.size() == 1 || (stack.size() > 2 && !stack.get(1).nodeName().equals("body"))) {
boolean process(Token t, Asta4DTagSupportHtmlTreeBuilder tb) { if (t.isCharacter()) { tb.newPendingTableCharacters(); tb.markInsertionMode(); tb.transition(InTableText); return tb.process(t); } else if (t.isComment()) { tb.insert(t.asComment()); return true; } else if (t.isDoctype()) { tb.error(this); return false; } else if (t.isStartTag()) { String name = startTag.name(); if (name.equals("caption")) { tb.clearStackToTableContext(); tb.insertMarkerToFormattingElements(); tb.insert(startTag); tb.transition(InCaption); } else if (name.equals("colgroup")) { tb.clearStackToTableContext(); tb.insert(startTag); tb.transition(InColumnGroup); } else if (name.equals("col")) { tb.process(new Token.StartTag("colgroup")); return tb.process(t); } else if (StringUtil.in(name, "tbody", "tfoot", "thead")) { tb.clearStackToTableContext(); tb.insert(startTag);
boolean process(Token t, Asta4DTagSupportHtmlTreeBuilder tb) { if (isWhitespace(t)) { tb.insert(t.asCharacter()); return true; tb.insert(t.asComment()); break; case Doctype: tb.error(this); return false; case StartTag: return InBody.process(t, tb); } else if (StringUtil.in(name, "base", "basefont", "bgsound", "command", "link")) { Element el = tb.insertEmpty(start); tb.maybeSetBaseUri(el); } else if (name.equals("meta")) { Element meta = tb.insertEmpty(start); tb.insert(start); tb.transition(InHeadNoscript); } else if (name.equals("script")) { tb.markInsertionMode(); tb.transition(Text); tb.insert(start); } else if (name.equals("head")) { tb.error(this); return false;
if (!tb.inTableScope(name)) { tb.error(this); tb.transition(InRow); // might not be in scope if empty: <td /> and processing fake end tag return false; tb.generateImpliedEndTags(); if (!tb.currentElement().nodeName().equals(name)) tb.error(this); tb.popStackToClose(name); tb.clearFormattingElementsToLastMarker(); tb.transition(InRow); } else if (StringUtil.in(name, "body", "caption", "col", "colgroup", "html")) { tb.error(this); return false; } else if (StringUtil.in(name, "table", "tbody", "tfoot", "thead", "tr")) { if (!tb.inTableScope(name)) { tb.error(this); return false; return tb.process(t); } else { return anythingElse(t, tb); if (!(tb.inTableScope("td") || tb.inTableScope("th"))) { tb.error(this); return false; return tb.process(t); } else {
boolean process(Token t, Asta4DTagSupportHtmlTreeBuilder tb) { if (isWhitespace(t)) { tb.insert(t.asCharacter()); } else if (t.isComment()) { tb.insert(t.asComment()); } else if (t.isDoctype()) { tb.error(this); return false; } else if (t.isStartTag()) { String name = start.name(); if (name.equals("html")) { return tb.process(start, InBody); } else if (name.equals("frameset")) { tb.insert(start); } else if (name.equals("frame")) { tb.insertEmpty(start); } else if (name.equals("noframes")) { return tb.process(start, InHead); } else { tb.error(this); return false; if (tb.currentElement().nodeName().equals("html")) { // frag tb.error(this); return false; } else { tb.pop(); if (!tb.isFragmentParsing() && !tb.currentElement().nodeName().equals("frameset")) { tb.transition(AfterFrameset);
Token.Character c = t.asCharacter(); if (c.getData().equals(nullString)) { tb.error(this); return false; } else { tb.getPendingTableCharacters().add(c); if (tb.getPendingTableCharacters().size() > 0) { for (Token.Character character : tb.getPendingTableCharacters()) { if (!isWhitespace(character)) { tb.error(this); if (StringUtil.in(tb.currentElement().nodeName(), "table", "tbody", "tfoot", "thead", "tr")) { tb.setFosterInserts(true); tb.process(character, InBody); tb.setFosterInserts(false); } else { tb.process(character, InBody); tb.insert(character); tb.newPendingTableCharacters(); tb.transition(tb.originalState()); return tb.process(t);
Token.Character c = t.asCharacter(); if (c.getData().equals(nullString)) { tb.error(this); return false; } else { tb.insert(c); tb.insert(t.asComment()); break; case Doctype: tb.error(this); return false; case StartTag: String name = start.name(); if (name.equals("html")) return tb.process(start, InBody); else if (name.equals("option")) { tb.process(new Token.EndTag("option")); tb.insert(start); } else if (name.equals("optgroup")) { if (tb.currentElement().nodeName().equals("option")) tb.process(new Token.EndTag("option")); else if (tb.currentElement().nodeName().equals("optgroup")) tb.process(new Token.EndTag("optgroup")); tb.insert(start); } else if (name.equals("select")) { tb.error(this); return tb.process(new Token.EndTag("select"));
boolean process(Token t, Asta4DTagSupportHtmlTreeBuilder tb) { if (t.isComment()) { tb.insert(t.asComment()); } else if (t.isDoctype() || isWhitespace(t) || (t.isStartTag() && t.asStartTag().name().equals("html"))) { return tb.process(t, InBody); } else if (t.isEOF()) { // nice work chuck } else { tb.error(this); tb.transition(InBody); return tb.process(t); } return true; } },
boolean process(Token t, Asta4DTagSupportHtmlTreeBuilder tb) { if (isWhitespace(t)) { tb.insert(t.asCharacter()); } else if (t.isComment()) { tb.insert(t.asComment()); } else if (t.isDoctype()) { tb.error(this); } else if (t.isStartTag()) { Token.StartTag startTag = t.asStartTag(); String name = startTag.name(); if (name.equals("html")) { return tb.process(t, InBody); } else if (name.equals("body")) { tb.insert(startTag); tb.framesetOk(false); tb.transition(InBody); } else if (name.equals("frameset")) { tb.insert(startTag); tb.transition(InFrameset); } else if (StringUtil.in(name, "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title")) { tb.error(this); Element head = tb.getHeadElement(); tb.push(head); tb.process(t, InHead); tb.removeFromStack(head); } else if (name.equals("head")) { tb.error(this); return false; } else {
tb.clearStackToTableRowContext(); tb.insert(startTag); tb.transition(InCell); tb.insertMarkerToFormattingElements(); } else if (StringUtil.in(name, "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr")) { return handleMissingTr(t, tb); if (!tb.inTableScope(name)) { tb.error(this); // frag return false; tb.clearStackToTableRowContext(); tb.pop(); // tr tb.transition(InTableBody); } else if (name.equals("table")) { return handleMissingTr(t, tb); } else if (StringUtil.in(name, "tbody", "tfoot", "thead")) { if (!tb.inTableScope(name)) { tb.error(this); return false; tb.process(new Token.EndTag("tr")); return tb.process(t); } else if (StringUtil.in(name, "body", "caption", "col", "colgroup", "html", "td", "th")) { tb.error(this); return false; } else {
String name = startTag.name(); if (name.equals("tr")) { tb.clearStackToTableBodyContext(); tb.insert(startTag); tb.transition(InRow); } else if (StringUtil.in(name, "th", "td")) { tb.error(this); tb.process(new Token.StartTag("tr")); return tb.process(startTag); } else if (StringUtil.in(name, "caption", "col", "colgroup", "tbody", "tfoot", "thead")) { return exitTableBody(t, tb); name = endTag.name(); if (StringUtil.in(name, "tbody", "tfoot", "thead")) { if (!tb.inTableScope(name)) { tb.error(this); return false; } else { tb.clearStackToTableBodyContext(); tb.pop(); tb.transition(InTable); tb.error(this); return false; } else
boolean process(Token t, Asta4DTagSupportHtmlTreeBuilder tb) { if (t.isCharacter()) { tb.insert(t.asCharacter()); } else if (t.isEOF()) { tb.error(this); // if current node is script: already started tb.pop(); tb.transition(tb.originalState()); return tb.process(t); } else if (t.isEndTag()) { // if: An end tag whose tag name is "script" -- scripting nesting level, if evaluating scripts tb.pop(); tb.transition(tb.originalState()); } return true; } },
boolean anyOtherEndTag(Token t, Asta4DTagSupportHtmlTreeBuilder tb) { String name = t.asEndTag().name(); DescendableLinkedList<Element> stack = tb.getStack(); Iterator<Element> it = stack.descendingIterator(); while (it.hasNext()) { Element node = it.next(); if (node.nodeName().equals(name)) { tb.generateImpliedEndTags(name); if (!name.equals(tb.currentElement().nodeName())) tb.error(this); tb.popStackToClose(name); break; } else { if (tb.isSpecial(node)) { tb.error(this); return false; } } } return true; } },
boolean process(Token t, Asta4DTagSupportHtmlTreeBuilder tb) { if (t.isDoctype()) { tb.error(this); } else if (t.isStartTag() && t.asStartTag().name().equals("html")) { return tb.process(t, InBody); } else if (t.isEndTag() && t.asEndTag().name().equals("noscript")) { tb.pop(); tb.transition(InHead); } else if (isWhitespace(t) || t.isComment() || (t.isStartTag() && StringUtil.in(t.asStartTag().name(), "basefont", "bgsound", "link", "meta", "noframes", "style"))) { return tb.process(t, InHead); } else if (t.isEndTag() && t.asEndTag().name().equals("br")) { return anythingElse(t, tb); } else if ((t.isStartTag() && StringUtil.in(t.asStartTag().name(), "head", "noscript")) || t.isEndTag()) { tb.error(this); return false; } else { return anythingElse(t, tb); } return true; }
boolean process(Token t, Asta4DTagSupportHtmlTreeBuilder tb) { if (t.isStartTag() && StringUtil.in(t.asStartTag().name(), "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th")) { tb.error(this); tb.process(new Token.EndTag("select")); return tb.process(t); } else if (t.isEndTag() && StringUtil.in(t.asEndTag().name(), "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th")) { tb.error(this); if (tb.inTableScope(t.asEndTag().name())) { tb.process(new Token.EndTag("select")); return (tb.process(t)); } else return false; } else { return tb.process(t, InSelect); } } },