org.htmlparser.Tag java code examples

/**
 * @see org.htmlparser.Tag#getTagName()
 */
public String getTagName() {
  return m_decorated.getTagName();
}

/**
 * @see org.htmlparser.Tag#getAttributesEx()
 */
@SuppressWarnings("unchecked")
public Vector<Attribute> getAttributesEx() {
  return m_decorated.getAttributesEx();
}

/** Should a contents of this tag be kept intact? */
private boolean isIntactTag(Tag tag) {
  String tagname = tag.getTagName();
  return tagname.equals("!DOCTYPE")
      || tagname.equals("STYLE")
      || tagname.equals("SCRIPT")
      || tagname.equals("OBJECT")
      || tagname.equals("EMBED")
      || (tagname.equals("META") && "content-type".equalsIgnoreCase(tag.getAttribute("http-equiv")));
}

  /**
   * Accept nodes that are tags and have a matching tag name.
   * This discards non-tag nodes and end tags.
   * The end tags are available on the enclosing non-end tag.
   * @param node The node to check.
   * @return <code>true</code> if the tag name matches,
   * <code>false</code> otherwise.
   */
  public boolean accept (Node node)
  {
    return ((node instanceof Tag)
        && !((Tag)node).isEndTag ()
        && ((Tag)node).getTagName ().equals (mName));
  }
}

public void visitTag(Tag tag) {
 String name = tag.getTagName().toLowerCase();
 if ("div".equals(name)) {
  divDepth++;
  if (divDepth == 1) {
   elementStart = tag.getStartPosition();
  }
 }
}

 /**
  * Constructor.
  *
  * @param tag
  *          The tag that we want to store information about.
  */
 public TagInfo(final Tag tag) {
  Page page = tag.getPage();
  startPosition = tag.getStartPosition();
  int endPosition = tag.getEndPosition();
  chars = new char[endPosition - startPosition];
  page.getText(chars, 0, startPosition, endPosition);
 }
}

/**
 * test expected behavior of htmlparser.
 * <p>htmlparser does neither unescape HTML entities found in text, nor
 * escape special characters in Node.toHtml().  We have a workaround based on this
 * behavior.  If this expectation breaks, we need to modify our code.</p>
 * @throws Exception
 */
public void testHtmlParser_attributeValueEscaping() throws Exception {
  final String html = "<html>" +
      "<body>" +
      "<a href=\"http://example.com/api?a=1&amp;b=2&c=3&#34;\">anchor</a>" +
      "</body>" +
      "</html>";
  byte[] bytes = html.getBytes();
  ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
  Page page = new Page(bais, "UTF-8");
  Lexer lexer = new Lexer(page);
  Node node;
  while ((node = lexer.nextNode()) != null) {
    if (node instanceof Tag) {
      Tag tag = (Tag)node;
      if (tag.getTagName().equalsIgnoreCase("A") && !tag.isEndTag()) {
        assertEquals("href", "http://example.com/api?a=1&amp;b=2&c=3&#34;", tag.getAttribute("HREF"));
        String htmlout = tag.toHtml();
        assertEquals("toHtml output", "<a href=\"http://example.com/api?a=1&amp;b=2&c=3&#34;\">", htmlout);
      }
    }
  }
}

name = tag.getTagName ();
if (tag.isEndTag ())
  ends = current.getEndTagEnders ();
else
  ends = current.getEnders ();
for (int i = 0; i < ends.length; i++)
  if (name.equalsIgnoreCase (ends[i]))

if (ret.isEmptyXmlTag ())
  ret.setEndTag (ret);
else
  do
        name = next.getTagName ();
        if (next.isEndTag () && name.equals (ret.getTagName ()))
          ret.setEndTag (next);
          node = null;
        else if (!next.isEndTag ())
          scanner = next.getThisScanner ();
          if (null != scanner)
                if (next.isEmptyXmlTag ())
                  next.setEndTag (next);
                  finishTag (next, lexer);
                  addChild (ret, next);
            scanner = opener.getThisScanner ();
            if ((null != scanner) && (scanner == this))
                if (name.equals (boffo.getTagName ()))
                  index = i;
                else if (isTagToBeEndedFor (boffo, next)) // check DTD

private void processTag(List<AbstractContainer> nodes, int current, List<Item> items,
            org.htmlparser.Tag tag) {
  final Tag.Type type;
  final String rawName = tag.getRawTagName();
  if (rawName.startsWith("/")) {
    type = Tag.Type.CLOSE;
  } else if (isOpenClose(tag)) {
    type = Tag.Type.OPEN_CLOSE;
  } else {
    type = Tag.Type.OPEN;
  }
  final String tagName = tag.getTagName().toLowerCase();
  if ("!doctype".equals(tagName)) {
    this.doctype = tag.toHtml();
  } else if (type == Tag.Type.CLOSE) {
    if (!isKnownBrixTag(tagName)) {
      Map<String, String> attributes = Collections.emptyMap();
      items.add(new SimpleTag(tagName, type, attributes));
    }
  } else {
    Map<String, String> attributes = getAttributes(tag);
    if (isKnownBrixTag(tagName)) {
      processBrixTag(nodes, current, items, tagName, getAttributes(tag), type);
    } else {
      items.add(new SimpleTag(tagName, type, attributes));
    }
  }
}

public void visitTag(Tag tag) {
  if (tag.getRawTagName().equalsIgnoreCase("img")) {
    String imageValue = tag.getAttribute("src");
    if (imageValue.contains("base64")) {
      String contentId = getContentId();
      tag.setAttribute("src", "cid:" + contentId);
      base64ImagesMap.put(contentId,
          imageValue.substring(imageValue.indexOf("base64") + 7, imageValue.length()));
    }
  }
}

  mSupport.processName (tag.getTagName (), mParts, false);
else
  mParts[2] = tag.getTagName ();
else if (mNameSpaces)
  mParts[2] = "";
else
  mParts[2] = tag.getTagName ();
  mParts[2], // raw
  new Attributes (tag, mSupport, mParts));
NodeList children = tag.getChildren ();
if (null != children)
  for (int i = 0; i < children.size (); i++)
    doSAX (children.elementAt (i));
end = tag.getEndTag ();
if (null != end)
    mSupport.processName (end.getTagName (), mParts, false);
  else
    mParts[2] = end.getTagName ();
  else if (mNameSpaces)
    mParts[2] = "";
  else
    mParts[2] = end.getTagName ();
  mContentHandler.endElement (
    mParts[0], // uri

result.append('<');
int n = -1;
if (tag.isEndTag()) {
  result.append('/');
  for (int i = sTags.size() - 1; i >= 0; i--) {
    Tag othertag = sTags.get(i);
    if (othertag.getTagName().equals(tag.getTagName())) {
      if (othertag.isEndTag()) {
        recursion++;
      } else {
if ("BR".equals(tag.getTagName())) {
  result.append("br");
} else {
  result.appendCodePoint(Character.toLowerCase(tag.getTagName().codePointAt(0)));
if (tag.isEmptyXmlTag()) { // This only detects tags that already have a

/**
 * @see org.htmlparser.Tag#getAttribute(java.lang.String)
 */
public String getAttribute(String arg0) {
  return m_decorated.getAttribute(arg0);
}

/**
 * Finish off a tag.
 * Perhap add a virtual end tag.
 * Set the end tag parent as this tag.
 * Perform the semantic acton.
 * @param tag The tag to finish off.
 * @param lexer A lexer positioned at the end of the tag.
 */
protected void finishTag (Tag tag, Lexer lexer)
  throws
    ParserException
{
  if (null == tag.getEndTag ())
    tag.setEndTag (createVirtualEndTag (tag, lexer, lexer.getPage (), lexer.getCursor ().getPosition ()));
  tag.getEndTag ().setParent (tag);
  tag.doSemanticAction ();
}

/**
 * @see org.htmlparser.Tag#getRawTagName()
 */
public String getRawTagName() {
  return m_decorated.getRawTagName();
}

private boolean isOpenClose(org.htmlparser.Tag tag) {
  if (tag.getRawTagName().endsWith("/")) {
    return true;
  } else {
    List<?> atts = tag.getAttributesEx();
    Attribute a = (Attribute) atts.get(atts.size() - 1);
    return a.getName() != null && a.getName().equals("/");
  }
}

/**
 * Constructor.
 *
 * @param tag
 *          The tag that this node definition belongs to.
 * @param templateFileName
 *          the name of the template file.
 */
public TagNode(final Tag tag, final String templateFileName) {
 emptyTag = tag.isEmptyXmlTag();
 Page page = tag.getPage();
 startPositionInTemplate = tag.getStartPosition();
 int endPosition = tag.getEndPosition();
 int length = endPosition - startPositionInTemplate;
 textRepresentation = new char[length];
 page.getText(textRepresentation, 0, startPositionInTemplate, endPosition);
 this.templateFileName = templateFileName;
}

  mSupport.processName (tag.getTagName (), mParts, false);
else
  mParts[2] = tag.getTagName ();
else if (mNameSpaces)
  mParts[2] = "";
else
  mParts[2] = tag.getTagName ();
  for (int i = 0; i < children.size (); i++)
    doSAX (children.elementAt (i));
end = tag.getEndTag ();
if (null != end)
    mSupport.processName (end.getTagName (), mParts, false);
  else
    mParts[2] = end.getTagName ();
  else if (mNameSpaces)
    mParts[2] = "";
  else
    mParts[2] = end.getTagName ();
  mContentHandler.endElement (
    mParts[0], // uri

tagNode.setTagName(tag.getRawTagName());
Iterator<PageAttribute> iterator = attributes.iterator();
if (!tag.isEmptyXmlTag()) {
 visitorPath.add(new VisitorPathElement().withEwtNode(tagNode).withTag(tag));
 parentNode = tagNode;
  visitMode = VisitMode.INLINE;
  int tagEndPosition = tag.getEndPosition() + 1;
  Page page = tag.getPage();

Javadoc

This interface represents a tag (<xxx yyy="zzz">) in the HTML document. Adds capabilities to a Node that are specific to a tag.

Most used methods

getTagName
Return the name of this tag. Note: This value is converted to uppercase and does not begin with "/"
getAttributesEx
Gets the attributes in the tag.
getAttribute
Returns the value of an attribute.
getEndTag
Get the end tag for this (composite) tag. For a non-composite tag this always returns null.
getRawTagName
Return the name of this tag.
toHtml
getAttributeEx
Returns the attribute with the given name.
isEndTag
Predicate to determine if this tag is an end tag (i.e. ).
getStartPosition
isEmptyXmlTag
Is this an empty xml tag of the form .
setAttribute
Set attribute with given key/value pair, the value is quoted by quote.
setAttributesEx
Sets the attributes. NOTE: Values of the extended hashtable are two element arrays of String, with t

Popular in Java

Making http requests using okhttp
setContentView (Activity)
compareTo (BigDecimal)
getApplicationContext (Context)
BufferedWriter (java.io)
Wraps an existing Writer and buffers the output. Expensive interaction with the underlying reader is
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
HttpServletRequest (javax.servlet.http)
Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
XPath (javax.xml.xpath)
XPath provides access to the XPath evaluation environment and expressions. Evaluation of XPath Expr
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
CodeWhisperer alternatives

How to useTag in org.htmlparser

Best Java code snippets using org.htmlparser.Tag (Showing top 20 results out of 315)

How to use
Tag
in
org.htmlparser