/** * Create a new HTML parser. This parser treats input as HTML5, and enforces the creation of a normalised document, * based on a knowledge of the semantics of the incoming tags. * @return a new HTML parser. */ public static Parser htmlParser() { return new Parser(new HtmlTreeBuilder()); }
/** * Parse HTML into a Document. * * @param html HTML to parse * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. * * @return parsed Document */ public static Document parse(String html, String baseUri) { TreeBuilder treeBuilder = new HtmlTreeBuilder(); return treeBuilder.parse(new StringReader(html), baseUri, ParseErrorList.noTracking(), treeBuilder.defaultSettings()); }
/** * Parse a fragment of HTML into a list of nodes. The context element, if supplied, supplies parsing context. * * @param fragmentHtml the fragment of HTML to parse * @param context (optional) the element that this HTML fragment is being parsed for (i.e. for inner HTML). This * provides stack context (for implicit element creation). * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. * @param errorList list to add errors to * * @return list of nodes parsed from the input HTML. Note that the context element, if supplied, is not modified. */ public static List<Node> parseFragment(String fragmentHtml, Element context, String baseUri, ParseErrorList errorList) { HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder(); return treeBuilder.parseFragment(fragmentHtml, context, baseUri, errorList, treeBuilder.defaultSettings()); }
/** * Parse a fragment of HTML into a list of nodes. The context element, if supplied, supplies parsing context. * * @param fragmentHtml the fragment of HTML to parse * @param context (optional) the element that this HTML fragment is being parsed for (i.e. for inner HTML). This * provides stack context (for implicit element creation). * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. * * @return list of nodes parsed from the input HTML. Note that the context element, if supplied, is not modified. */ public static List<Node> parseFragment(String fragmentHtml, Element context, String baseUri) { HtmlTreeBuilder treeBuilder = new HtmlTreeBuilder(); return treeBuilder.parseFragment(fragmentHtml, context, baseUri, ParseErrorList.noTracking(), treeBuilder.defaultSettings()); }