/** * Parse HTML into a Document. * * @param html HTML to parse * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. * * @return parsed Document */ public static Document parse(String html, String baseUri) { TreeBuilder treeBuilder = new HtmlTreeBuilder(); return treeBuilder.parse(new StringReader(html), baseUri, ParseErrorList.noTracking(), treeBuilder.defaultSettings()); }
private boolean handleMissingTr(Token t, TreeBuilder tb) { boolean processed = tb.processEndTag("tr"); if (processed) return tb.process(t); else return false; } },
Document parse(Reader input, String baseUri, ParseErrorList errors, ParseSettings settings) { initialiseParse(input, baseUri, errors, settings); runParser(); return doc; }
protected void runParser() { while (true) { Token token = tokeniser.read(); process(token); token.reset(); if (token.type == Token.TokenType.EOF) break; } }
public Document parseInput(String html, String baseUri) { errors = isTrackErrors() ? ParseErrorList.tracking(maxErrors) : ParseErrorList.noTracking(); return treeBuilder.parse(new StringReader(html), baseUri, errors, settings); }
@Override protected void initialiseParse(Reader input, String baseUri, ParseErrorList errors, ParseSettings settings) { super.initialiseParse(input, baseUri, errors, settings); // this is a bit mucky. todo - probably just create new parser objects to ensure all reset. state = HtmlTreeBuilderState.Initial; originalState = null; baseUriSetFromDoc = false; headElement = null; formElement = null; contextElement = null; formattingElements = new ArrayList<>(); pendingTableCharacters = new ArrayList<>(); emptyEnd = new Token.EndTag(); framesetOk = true; fosterInserts = false; fragmentParsing = false; }
/** * Create a new Parser, using the specified TreeBuilder * @param treeBuilder TreeBuilder to use to parse input into Documents. */ public Parser(TreeBuilder treeBuilder) { this.treeBuilder = treeBuilder; settings = treeBuilder.defaultSettings(); }
public Document parseInput(Reader inputHtml, String baseUri) { errors = isTrackErrors() ? ParseErrorList.tracking(maxErrors) : ParseErrorList.noTracking(); return treeBuilder.parse(inputHtml, baseUri, errors, settings); }
@Override protected void initialiseParse(Reader input, String baseUri, ParseErrorList errors, ParseSettings settings) { super.initialiseParse(input, baseUri, errors, settings); stack.add(doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack) doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml); }
private boolean anythingElse(Token t, TreeBuilder tb) { boolean processed = tb.processEndTag("colgroup"); if (processed) // only ignored in frag case return tb.process(t); return true; } },
@Override Document parse(String input, String baseUri, ParseErrorList errors) { state = Asta4DTagSupportHtmlTreeBuilderState.Initial; baseUriSetFromDoc = false; return super.parse(input, baseUri, errors); }
private boolean anythingElse(Token t, TreeBuilder tb) { tb.processEndTag("head"); return tb.process(t); } },