private static boolean isWhitespace(Token t) { if (t.isCharacter()) { String data = t.asCharacter().getData(); return isWhitespace(data); } return false; }
void read(Tokeniser t, CharacterReader r) { if (r.matches('/')) { t.emit('/'); t.createTempBuffer(); t.advanceTransition(ScriptDataDoubleEscapeEnd); } else { t.transition(ScriptDataDoubleEscaped); } } },
void error(HtmlTreeBuilderState state) { if (errors.canAddError()) errors.add(new ParseError(reader.pos(), "Unexpected token [%s] when in state [%s]", currentToken.tokenType(), state)); }
void read(Tokeniser t, CharacterReader r) { if (r.matchesLetter()) { t.createTagPending(false); t.tagPending.appendTagName(r.current()); t.dataBuffer.append(r.current()); t.advanceTransition(ScriptDataEscapedEndTagName); } else { t.emit("</"); t.transition(ScriptDataEscaped); } } },
private static void readEndTag(Tokeniser t, CharacterReader r, TokeniserState a, TokeniserState b) { if (r.matchesLetter()) { t.createTagPending(false); t.transition(a); } else { t.emit("</"); t.transition(b); } }
private static void handleRcData(Token.StartTag startTag, HtmlTreeBuilder tb) { tb.tokeniser.transition(TokeniserState.Rcdata); tb.markInsertionMode(); tb.transition(Text); tb.insert(startTag); }
/** * Create a new XML parser. This parser assumes no knowledge of the incoming tags and does not treat it as HTML, * rather creates a simple tree directly from the input. * @return a new simple XML parser. */ public static Parser xmlParser() { return new Parser(new XmlTreeBuilder()); } }
/** * Parse a fragment of XML into a list of nodes. * * @param fragmentXml the fragment of XML to parse * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. * @return list of nodes parsed from the input XML. */ public static List<Node> parseXmlFragment(String fragmentXml, String baseUri) { XmlTreeBuilder treeBuilder = new XmlTreeBuilder(); return treeBuilder.parseFragment(fragmentXml, baseUri, ParseErrorList.noTracking(), treeBuilder.defaultSettings()); }
public Document parseInput(Reader inputHtml, String baseUri) { errors = isTrackErrors() ? ParseErrorList.tracking(maxErrors) : ParseErrorList.noTracking(); return treeBuilder.parse(inputHtml, baseUri, errors, settings); }
/** * Create a new HTML parser. This parser treats input as HTML5, and enforces the creation of a normalised document, * based on a knowledge of the semantics of the incoming tags. * @return a new HTML parser. */ public static Parser htmlParser() { return new Parser(new HtmlTreeBuilder()); }
/** * Utility method to unescape HTML entities from a string * @param string HTML escaped string * @param inAttribute if the string is to be escaped in strict mode (as attributes are) * @return an unescaped string */ public static String unescapeEntities(String string, boolean inAttribute) { Tokeniser tokeniser = new Tokeniser(new CharacterReader(string), ParseErrorList.noTracking()); return tokeniser.unescapeEntities(inAttribute); }
void error(String errorMsg) { if (errors.canAddError()) errors.add(new ParseError(reader.pos(), errorMsg)); }
void read(Tokeniser t, CharacterReader r) { if (r.matches('/')) { t.createTempBuffer(); t.advanceTransition(RawtextEndTagOpen); } else { t.emit('<'); t.transition(Rawtext); } } },
private static void handleRawtext(Token.StartTag startTag, HtmlTreeBuilder tb) { tb.tokeniser.transition(TokeniserState.Rawtext); tb.markInsertionMode(); tb.transition(Text); tb.insert(startTag); }
void read(Tokeniser t, CharacterReader r) { if (r.matches('-')) { t.emit('-'); t.advanceTransition(ScriptDataEscapedDashDash); } else { t.transition(ScriptData); } } },
void read(Tokeniser t, CharacterReader r) { if (r.matches('-')) { t.emit('-'); t.advanceTransition(ScriptDataEscapeStartDash); } else { t.transition(ScriptData); } } },