/** * @see org.htmlparser.Tag#getTagName() */ public String getTagName() { return m_decorated.getTagName(); }
/** * @see org.htmlparser.Tag#getAttributesEx() */ @SuppressWarnings("unchecked") public Vector<Attribute> getAttributesEx() { return m_decorated.getAttributesEx(); }
/** Should a contents of this tag be kept intact? */ private boolean isIntactTag(Tag tag) { String tagname = tag.getTagName(); return tagname.equals("!DOCTYPE") || tagname.equals("STYLE") || tagname.equals("SCRIPT") || tagname.equals("OBJECT") || tagname.equals("EMBED") || (tagname.equals("META") && "content-type".equalsIgnoreCase(tag.getAttribute("http-equiv"))); }
/** * Accept nodes that are tags and have a matching tag name. * This discards non-tag nodes and end tags. * The end tags are available on the enclosing non-end tag. * @param node The node to check. * @return <code>true</code> if the tag name matches, * <code>false</code> otherwise. */ public boolean accept (Node node) { return ((node instanceof Tag) && !((Tag)node).isEndTag () && ((Tag)node).getTagName ().equals (mName)); } }
public void visitTag(Tag tag) { String name = tag.getTagName().toLowerCase(); if ("div".equals(name)) { divDepth++; if (divDepth == 1) { elementStart = tag.getStartPosition(); } } }
/** * Constructor. * * @param tag * The tag that we want to store information about. */ public TagInfo(final Tag tag) { Page page = tag.getPage(); startPosition = tag.getStartPosition(); int endPosition = tag.getEndPosition(); chars = new char[endPosition - startPosition]; page.getText(chars, 0, startPosition, endPosition); } }
/** * test expected behavior of htmlparser. * <p>htmlparser does neither unescape HTML entities found in text, nor * escape special characters in Node.toHtml(). We have a workaround based on this * behavior. If this expectation breaks, we need to modify our code.</p> * @throws Exception */ public void testHtmlParser_attributeValueEscaping() throws Exception { final String html = "<html>" + "<body>" + "<a href=\"http://example.com/api?a=1&b=2&c=3"\">anchor</a>" + "</body>" + "</html>"; byte[] bytes = html.getBytes(); ByteArrayInputStream bais = new ByteArrayInputStream(bytes); Page page = new Page(bais, "UTF-8"); Lexer lexer = new Lexer(page); Node node; while ((node = lexer.nextNode()) != null) { if (node instanceof Tag) { Tag tag = (Tag)node; if (tag.getTagName().equalsIgnoreCase("A") && !tag.isEndTag()) { assertEquals("href", "http://example.com/api?a=1&b=2&c=3"", tag.getAttribute("HREF")); String htmlout = tag.toHtml(); assertEquals("toHtml output", "<a href=\"http://example.com/api?a=1&b=2&c=3"\">", htmlout); } } } }
name = tag.getTagName (); if (tag.isEndTag ()) ends = current.getEndTagEnders (); else ends = current.getEnders (); for (int i = 0; i < ends.length; i++) if (name.equalsIgnoreCase (ends[i]))
if (ret.isEmptyXmlTag ()) ret.setEndTag (ret); else do name = next.getTagName (); if (next.isEndTag () && name.equals (ret.getTagName ())) ret.setEndTag (next); node = null; else if (!next.isEndTag ()) scanner = next.getThisScanner (); if (null != scanner) if (next.isEmptyXmlTag ()) next.setEndTag (next); finishTag (next, lexer); addChild (ret, next); scanner = opener.getThisScanner (); if ((null != scanner) && (scanner == this)) if (name.equals (boffo.getTagName ())) index = i; else if (isTagToBeEndedFor (boffo, next)) // check DTD
private void processTag(List<AbstractContainer> nodes, int current, List<Item> items, org.htmlparser.Tag tag) { final Tag.Type type; final String rawName = tag.getRawTagName(); if (rawName.startsWith("/")) { type = Tag.Type.CLOSE; } else if (isOpenClose(tag)) { type = Tag.Type.OPEN_CLOSE; } else { type = Tag.Type.OPEN; } final String tagName = tag.getTagName().toLowerCase(); if ("!doctype".equals(tagName)) { this.doctype = tag.toHtml(); } else if (type == Tag.Type.CLOSE) { if (!isKnownBrixTag(tagName)) { Map<String, String> attributes = Collections.emptyMap(); items.add(new SimpleTag(tagName, type, attributes)); } } else { Map<String, String> attributes = getAttributes(tag); if (isKnownBrixTag(tagName)) { processBrixTag(nodes, current, items, tagName, getAttributes(tag), type); } else { items.add(new SimpleTag(tagName, type, attributes)); } } }
public void visitTag(Tag tag) { if (tag.getRawTagName().equalsIgnoreCase("img")) { String imageValue = tag.getAttribute("src"); if (imageValue.contains("base64")) { String contentId = getContentId(); tag.setAttribute("src", "cid:" + contentId); base64ImagesMap.put(contentId, imageValue.substring(imageValue.indexOf("base64") + 7, imageValue.length())); } } }
mSupport.processName (tag.getTagName (), mParts, false); else mParts[2] = tag.getTagName (); else if (mNameSpaces) mParts[2] = ""; else mParts[2] = tag.getTagName (); mParts[2], // raw new Attributes (tag, mSupport, mParts)); NodeList children = tag.getChildren (); if (null != children) for (int i = 0; i < children.size (); i++) doSAX (children.elementAt (i)); end = tag.getEndTag (); if (null != end) mSupport.processName (end.getTagName (), mParts, false); else mParts[2] = end.getTagName (); else if (mNameSpaces) mParts[2] = ""; else mParts[2] = end.getTagName (); mContentHandler.endElement ( mParts[0], // uri
result.append('<'); int n = -1; if (tag.isEndTag()) { result.append('/'); for (int i = sTags.size() - 1; i >= 0; i--) { Tag othertag = sTags.get(i); if (othertag.getTagName().equals(tag.getTagName())) { if (othertag.isEndTag()) { recursion++; } else { if ("BR".equals(tag.getTagName())) { result.append("br"); } else { result.appendCodePoint(Character.toLowerCase(tag.getTagName().codePointAt(0))); if (tag.isEmptyXmlTag()) { // This only detects tags that already have a
/** * @see org.htmlparser.Tag#getAttribute(java.lang.String) */ public String getAttribute(String arg0) { return m_decorated.getAttribute(arg0); }
/** * Finish off a tag. * Perhap add a virtual end tag. * Set the end tag parent as this tag. * Perform the semantic acton. * @param tag The tag to finish off. * @param lexer A lexer positioned at the end of the tag. */ protected void finishTag (Tag tag, Lexer lexer) throws ParserException { if (null == tag.getEndTag ()) tag.setEndTag (createVirtualEndTag (tag, lexer, lexer.getPage (), lexer.getCursor ().getPosition ())); tag.getEndTag ().setParent (tag); tag.doSemanticAction (); }
/** * @see org.htmlparser.Tag#getRawTagName() */ public String getRawTagName() { return m_decorated.getRawTagName(); }
private boolean isOpenClose(org.htmlparser.Tag tag) { if (tag.getRawTagName().endsWith("/")) { return true; } else { List<?> atts = tag.getAttributesEx(); Attribute a = (Attribute) atts.get(atts.size() - 1); return a.getName() != null && a.getName().equals("/"); } }
/** * Constructor. * * @param tag * The tag that this node definition belongs to. * @param templateFileName * the name of the template file. */ public TagNode(final Tag tag, final String templateFileName) { emptyTag = tag.isEmptyXmlTag(); Page page = tag.getPage(); startPositionInTemplate = tag.getStartPosition(); int endPosition = tag.getEndPosition(); int length = endPosition - startPositionInTemplate; textRepresentation = new char[length]; page.getText(textRepresentation, 0, startPositionInTemplate, endPosition); this.templateFileName = templateFileName; }
mSupport.processName (tag.getTagName (), mParts, false); else mParts[2] = tag.getTagName (); else if (mNameSpaces) mParts[2] = ""; else mParts[2] = tag.getTagName (); for (int i = 0; i < children.size (); i++) doSAX (children.elementAt (i)); end = tag.getEndTag (); if (null != end) mSupport.processName (end.getTagName (), mParts, false); else mParts[2] = end.getTagName (); else if (mNameSpaces) mParts[2] = ""; else mParts[2] = end.getTagName (); mContentHandler.endElement ( mParts[0], // uri
tagNode.setTagName(tag.getRawTagName()); Iterator<PageAttribute> iterator = attributes.iterator(); if (!tag.isEmptyXmlTag()) { visitorPath.add(new VisitorPathElement().withEwtNode(tagNode).withTag(tag)); parentNode = tagNode; visitMode = VisitMode.INLINE; int tagEndPosition = tag.getEndPosition() + 1; Page page = tag.getPage();