/** * @see org.htmlparser.Node#toHtml(boolean) */ public String toHtml(boolean value) { return m_decorated.toHtml(value); }
/** * @see org.htmlparser.Node#toHtml() */ public String toHtml() { return m_decorated.toHtml(); }
/** * @see org.htmlparser.Node#toHtml(boolean) */ public String toHtml(boolean value) { return m_decorated.toHtml(value); }
/** * @see org.htmlparser.Node#toHtml() */ public String toHtml() { return m_decorated.toHtml(); }
/** * @see org.htmlparser.Node#toHtml() */ public String toHtml() { return m_decorated.toHtml(); }
/** * Add the textual contents of the end tag of this node to the buffer. * @param sb The buffer to append to. */ protected void putEndTagInto(StringBuilder sb) { // eliminate virtual tags // if (!(endTag.getStartPosition () == endTag.getEndPosition ())) sb.append(getEndTag ().toHtml()); }
private boolean handleTageInNoneAndInlineMode(final Tag tag) { boolean noneOrInlineTag = false; if ((visitMode == VisitMode.NONE) || (visitMode == VisitMode.INLINE)) { if (!tag.isEmptyXmlTag()) { visitorPath.add(new VisitorPathElement().withTag(tag)); } if (visitMode == VisitMode.INLINE) { currentSB.append(tag.toHtml(true)); } noneOrInlineTag = true; } return noneOrInlineTag; }
public void visitEndTag(Tag tag) { Node parent; parent = tag.getParent (); // process only those nodes not processed by a parent if (null == parent) // an orphan end tag modifiedResult.append(tag.toHtml()); else if (null == parent.getParent ()) // a top level tag with no parents modifiedResult.append(parent.toHtml()); }
public void visitTag(Tag tag) { if (tag instanceof LinkTag) ((LinkTag)tag).setLink(linkPrefix + ((LinkTag)tag).getLink()); else if (tag instanceof ImageTag) ((ImageTag)tag).setImageURL(linkPrefix + ((ImageTag)tag).getImageURL()); // process only those nodes that won't be processed by an end tag, // nodes without parents or parents without an end tag, since // the complete processing of all children should happen before // we turn this node back into html text if (null == tag.getParent () && (!(tag instanceof CompositeTag) || null == ((CompositeTag)tag).getEndTag ())) modifiedResult.append(tag.toHtml()); }
private void processTag(List<AbstractContainer> nodes, int current, List<Item> items, org.htmlparser.Tag tag) { final Tag.Type type; final String rawName = tag.getRawTagName(); if (rawName.startsWith("/")) { type = Tag.Type.CLOSE; } else if (isOpenClose(tag)) { type = Tag.Type.OPEN_CLOSE; } else { type = Tag.Type.OPEN; } final String tagName = tag.getTagName().toLowerCase(); if ("!doctype".equals(tagName)) { this.doctype = tag.toHtml(); } else if (type == Tag.Type.CLOSE) { if (!isKnownBrixTag(tagName)) { Map<String, String> attributes = Collections.emptyMap(); items.add(new SimpleTag(tagName, type, attributes)); } } else { Map<String, String> attributes = getAttributes(tag); if (isKnownBrixTag(tagName)) { processBrixTag(nodes, current, items, tagName, getAttributes(tag), type); } else { items.add(new SimpleTag(tagName, type, attributes)); } } }
/** * test expected behavior of htmlparser. * <p>htmlparser does neither unescape HTML entities found in text, nor * escape special characters in Node.toHtml(). We have a workaround based on this * behavior. If this expectation breaks, we need to modify our code.</p> * @throws Exception */ public void testHtmlParser_attributeValueEscaping() throws Exception { final String html = "<html>" + "<body>" + "<a href=\"http://example.com/api?a=1&b=2&c=3"\">anchor</a>" + "</body>" + "</html>"; byte[] bytes = html.getBytes(); ByteArrayInputStream bais = new ByteArrayInputStream(bytes); Page page = new Page(bais, "UTF-8"); Lexer lexer = new Lexer(page); Node node; while ((node = lexer.nextNode()) != null) { if (node instanceof Tag) { Tag tag = (Tag)node; if (tag.getTagName().equalsIgnoreCase("A") && !tag.isEndTag()) { assertEquals("href", "http://example.com/api?a=1&b=2&c=3"", tag.getAttribute("HREF")); String htmlout = tag.toHtml(); assertEquals("toHtml output", "<a href=\"http://example.com/api?a=1&b=2&c=3"\">", htmlout); } } } }
flushbefors(); writeout(tag.toHtml()); if (tag.getEndTag() != null) { recurse = false;