@Test public void testIncompleteDoctype() { StringBuilder src = new StringBuilder("<!"); parser = new HtmlTokenParser(src, 0); Result resultIncomplete = parser.parseToken(); assertThat(resultIncomplete, equalTo(Result.INCOMPLETE)); src.append("DOCTYPE HTML>"); Result resultComplete = parser.parseToken(); assertThat(resultComplete, equalTo(Result.SUCCESS)); assertThat(parser.getParsedTokenType(), equalTo(Token.START_TAG)); assertThat(parser.getTagType(), equalTo((CharSequence) "!DOCTYPE")); }
@Test public void testOpeningTag() { parser = new HtmlTokenParser("<img src = \"cool images source \" >", 0); Result result = parser.parseToken(); assertThat(result, equalTo(Result.SUCCESS)); assertThat(parser.getParsedTokenType(), equalTo(Token.START_TAG)); assertThat(parser.getTagType(), equalTo((CharSequence) "img")); assertThat(parser.getTagArguments(), equalTo((CharSequence) "src = \"cool images source \"")); }
@Test public void testComplexDoctypeTag() { StringBuilder src = new StringBuilder("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">"); parser = new HtmlTokenParser(src, 0); Result result = parser.parseToken(); assertThat(result, equalTo(Result.SUCCESS)); assertThat(parser.getParsedTokenType(), equalTo(Token.START_TAG)); assertThat(parser.getTagType(), equalTo((CharSequence) "!DOCTYPE")); assertThat(parser.getTagArguments(), equalTo((CharSequence) "html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"")); }
@Test public void testOffsetSupport() { parser = new HtmlTokenParser("This part should be completely ignored. <div />", "This part should be completely ignored.".length()); Result result = parser.parseToken(); assertThat(result, equalTo(Result.SUCCESS)); assertThat(parser.getParsedTokenType(), equalTo(Token.STANDALONE_TAG)); assertThat(parser.getTagType(), equalTo((CharSequence) "div")); assertThat(parser.getTagArguments(), equalTo((CharSequence) "")); }
@Test public void testQuotedArguments() { StringBuilder src = new StringBuilder("<div arg1 = \"contains weird stuff like / or > or />"); parser = new HtmlTokenParser(src, 0); Result resultIncomplete = parser.parseToken(); assertThat(resultIncomplete, equalTo(Result.INCOMPLETE)); src.append("\" arg2 = \'contains weird stuff like / or > or />\' >"); Result result = parser.parseToken(); assertThat(result, equalTo(Result.SUCCESS)); assertThat(parser.getParsedTokenType(), equalTo(Token.START_TAG)); assertThat(parser.getTagType(), equalTo((CharSequence) "div")); assertThat(parser.getTagArguments(), equalTo((CharSequence) "arg1 = \"contains weird stuff like / or > or />\" arg2 = \'contains weird stuff like / or > or />\'")); }
@Test public void testMultipleTagsParsing() { parser = new HtmlTokenParser("<html> <head/>", 0); Result resultA = parser.parseToken(); assertThat(resultA, equalTo(Result.SUCCESS)); assertThat(parser.getParsedTokenType(), equalTo(Token.START_TAG)); assertThat(parser.getTagType(), equalTo((CharSequence) "html")); parser.resetState(); Result resultB = parser.parseToken(); assertThat(resultB, equalTo(Result.SUCCESS)); assertThat(parser.getParsedTokenType(), equalTo(Token.STANDALONE_TAG)); assertThat(parser.getTagType(), equalTo((CharSequence) "head")); }
@Test public void testDoubleComment() { parser = new HtmlTokenParser("<!--commentA--><!--commentB-->", 0); Result result = parser.parseToken(); assertThat(result, equalTo(Result.SUCCESS)); assertThat(parser.getParsedTokenType(), equalTo(Token.COMMENT)); assertThat(parser.getCaret().getOffset(), equalTo("<!--commentA-->".length())); }
@Test public void testIncompleteOpeningTag() { StringBuilder src = new StringBuilder("<"); parser = new HtmlTokenParser(src, 0); Result resultIncomplete = parser.parseToken(); assertThat(resultIncomplete, equalTo(Result.INCOMPLETE)); src.append("div>"); Result resultComplete = parser.parseToken(); assertThat(resultComplete, equalTo(Result.SUCCESS)); assertThat(parser.getParsedTokenType(), equalTo(Token.START_TAG)); assertThat(parser.getTagType(), equalTo((CharSequence) "div")); assertThat(parser.getTagArguments(), equalTo((CharSequence) "")); }
@Test public void testIncompleteClosingTag() { StringBuilder src = new StringBuilder("<"); parser = new HtmlTokenParser(src, 0); Result incompleteResult = parser.parseToken(); assertThat(incompleteResult, equalTo(Result.INCOMPLETE)); src.append("/div>"); Result result = parser.parseToken(); assertThat(result, equalTo(Result.SUCCESS)); assertThat(parser.getParsedTokenType(), equalTo(Token.END_TAG)); assertThat(parser.getTagType(), equalTo((CharSequence) "div")); assertThat(parser.getTagArguments(), equalTo((CharSequence) "")); }
@Test public void testCaretMoving() { parser = new HtmlTokenParser("This part should be completely ignored. <div />", 0); parser.getCaret().goN("This part should be completely ignored.".length()); Result result = parser.parseToken(); assertThat(result, equalTo(Result.SUCCESS)); assertThat(parser.getParsedTokenType(), equalTo(Token.STANDALONE_TAG)); assertThat(parser.getTagType(), equalTo((CharSequence) "div")); assertThat(parser.getTagArguments(), equalTo((CharSequence) "")); } }
@Test public void testIncompleteComment() { StringBuilder src = new StringBuilder("<!"); parser = new HtmlTokenParser(src, 0); Result resultIncompleteA = parser.parseToken(); assertThat(resultIncompleteA, equalTo(Result.INCOMPLETE)); src.append("-"); // NOPMD Result resultIncompleteB = parser.parseToken(); assertThat(resultIncompleteB, equalTo(Result.INCOMPLETE)); src.append("- tricked! this actually is a comment with random -> and -- and > ! "); // NOPMD Result resultIncompleteC = parser.parseToken(); assertThat(resultIncompleteC, equalTo(Result.INCOMPLETE)); src.append("-->"); Result result = parser.parseToken(); assertThat(result, equalTo(Result.SUCCESS)); assertThat(parser.getParsedTokenType(), equalTo(Token.COMMENT)); }
/** * Tries to find an opening html tag. */ private void scanForHtmlTag() { if (CharSequenceUtils.checkEqualIgnoreCase(tokenParser.getTagType(), "html")) { if (tokenParser.getParsedTokenType() != Token.START_TAG) { abortInjectionPointSearch(); return; } status = Status.SCAN_FOR_HEAD_TAG; } else { // current token is not the html tag, we assume the document starts immediately with the // head status = Status.SCAN_FOR_HEAD_TAG; processToken(); } }
/** * Tries to find an opening body tag. Omitting both the head and the body tag is currently not * supported. */ private void scanForBodyTag() { if (CharSequenceUtils.checkEqualIgnoreCase(tokenParser.getTagType(), "body")) { if (tokenParser.getParsedTokenType() != Token.START_TAG) { abortInjectionPointSearch(); return; } // Perform injection after start of the head tag status = Status.INJECTION_POINT_FOUND; } else { // current token is not the head tag, we assume it must be the body tag (head is empty) abortInjectionPointSearch(); } }
/** * Scans for an opening <?xml .. ?> declaration in case the html is delivered as XML. */ private void scanXmlDeclaration() { // Preamble checking based on the information on this page if (CharSequenceUtils.checkEqualIgnoreCase(tokenParser.getTagType(), "?xml")) { // ?> is treated by the parser as a stand-alone tag if (tokenParser.getParsedTokenType() != Token.STANDALONE_TAG) { abortInjectionPointSearch(); return; } // Xml header detected, now we require an html doctype to continue status = Status.SCAN_REQUIRED_XHTML_PREAMBLE; } else { // no xml tag found, therefore we are non-strict in requiring a doctype declaration status = Status.SCAN_HTML_PREAMBLE; processToken(); } }
@Test public void testXMLTag() { StringBuilder src = new StringBuilder("<?"); parser = new HtmlTokenParser(src, 0); Result incompleteResultA = parser.parseToken(); assertThat(incompleteResultA, equalTo(Result.INCOMPLETE)); src.append("xml version=\"1.0\" encoding=\"UTF-8\" "); // NOPMD Result incompleteResultB = parser.parseToken(); assertThat(incompleteResultB, equalTo(Result.INCOMPLETE)); src.append("?"); // NOPMD Result incompleteResultC = parser.parseToken(); assertThat(incompleteResultC, equalTo(Result.INCOMPLETE)); src.append(">"); // NOPMD Result result = parser.parseToken(); assertThat(result, equalTo(Result.SUCCESS)); assertThat(parser.getParsedTokenType(), equalTo(Token.STANDALONE_TAG)); assertThat(parser.getTagType(), equalTo((CharSequence) "?xml")); assertThat(parser.getTagArguments(), equalTo((CharSequence) "version=\"1.0\" encoding=\"UTF-8\"")); }
/** * Tries to find an opening head tag. Omitting both the head and the body tag is currently not * supported. */ private void scanForHeadTag() { if (CharSequenceUtils.checkEqualIgnoreCase(tokenParser.getTagType(), "head")) { if (tokenParser.getParsedTokenType() != Token.START_TAG) { abortInjectionPointSearch(); return; } // Perform injection after start of the head tag status = Status.INJECTION_POINT_FOUND; } else { // current token is not the head tag, we assume it must be the body tag (head is empty) status = Status.SCAN_FOR_BODY_TAG; processToken(); } }
if (tokenParser.getParsedTokenType() == Token.COMMENT) { return;
/** * Scans for an opening html tag, skipping preamble tags like or !DOCTYPE. */ private void scanHtmlPreamble() { // Preamble checking based on the information on this page // http://wiki.selfhtml.org/wiki/HTML/Dokumentstruktur_und_Aufbau#HTML5 // we also allow html without preamble, directly starting with the <html> tag if (CharSequenceUtils.checkEqualIgnoreCase(tokenParser.getTagType(), "!DOCTYPE")) { // Doctypes are formated as opening tags if (tokenParser.getParsedTokenType() != Token.START_TAG) { abortInjectionPointSearch(); return; } // we accept any doctype starting with "html" if (!CharSequenceUtils.checkEqualIgnoreCase(tokenParser.getTagArguments(), 0, 4, "html", 0, 4)) { abortInjectionPointSearch(); return; } // DOCTYPE okay, proceed with the next tag scanning for the html tag status = Status.SCAN_FOR_HTML_TAG; } else { // no preamble tag found, we assume the html is starting immediately status = Status.SCAN_FOR_HTML_TAG; processToken(); } }
/** * Same as {@link #scanHtmlPreamble()}, however does not allow to ommit the preamble. */ private void scanRequiredXHtmlPreamble() { // Preamble checking based on the information on this page // http://wiki.selfhtml.org/wiki/HTML/Dokumentstruktur_und_Aufbau#HTML5 // we also allow html without preamble, directly starting with the <html> tag if (CharSequenceUtils.checkEqualIgnoreCase(tokenParser.getTagType(), "!DOCTYPE")) { // Doctypes are formated as opening tags if (tokenParser.getParsedTokenType() != Token.START_TAG) { abortInjectionPointSearch(); return; } // we accept any doctype starting with "html" if (!CharSequenceUtils.checkEqualIgnoreCase(tokenParser.getTagArguments(), 0, 4, "html", 0, 4)) { abortInjectionPointSearch(); return; } // DOCTYPE okay, proceed with the next tag scanning for the html tag status = Status.SCAN_FOR_HTML_TAG; } else { // no preamble tag found, it however is required for xhtml abortInjectionPointSearch(); return; } }