org.htmlparser.util.SimpleNodeIterator java code examples

/**
 * Returns the node number of a child node given the node object.
 * This would typically be used in conjuction with digUpStringNode,
 * after which the string node's parent can be used to find the
 * string node's position. Faster than calling findPositionOf(text)
 * again. Note that the position is at a linear level alone - there
 * is no recursion in this method.
 * @param searchNode The child node to find.
 * @return The offset of the child tag or -1 if it was not found.
 */
public int findPositionOf(Node searchNode) {
  Node node;
  int loc = 0;
  for (SimpleNodeIterator e=children();e.hasMoreNodes();) {
    node = e.nextNode();
    if (node==searchNode) {
      return loc;
    }
    loc++;
  }
  return -1;
}

/**
 * Returns the node number of a child node given the node object.
 * This would typically be used in conjuction with digUpStringNode,
 * after which the string node's parent can be used to find the
 * string node's position. Faster than calling findPositionOf(text)
 * again. Note that the position is at a linear level alone - there
 * is no recursion in this method.
 * @param searchNode The child node to find.
 * @return The offset of the child tag or -1 if it was not found.
 */
public int findPositionOf(Node searchNode) {
  Node node;
  int loc = 0;
  for (SimpleNodeIterator e=children();e.hasMoreNodes();) {
    node = e.nextNode();
    if (node==searchNode) {
      return loc;
    }
    loc++;
  }
  return -1;
}

 NodeList nodes = parser.extractAllNodesThatMatch(new AndFilter(new TagNameFilter("h3"),
  new HasAttributeFilter("id", "h3_"+num)));
SimpleNodeIterator nodeIterator = nodes.elements();
while (nodeIterator.hasMoreNodes()) {
  Node node = nodeIterator.nextNode();
  HeadingTag tag = (HeadingTag)node;
  System.out.println(tag.getStringText());
}

/**
 * Return the HTML code for the children of this tag.
 * @return A string with the HTML code for the contents of this tag.
 */
public String getChildrenHTML() {
  StringBuffer buff = new StringBuffer();
  for (SimpleNodeIterator e = children();e.hasMoreNodes();) {
    AbstractNode node = (AbstractNode)e.nextNode();
    buff.append(node.toHtml());
  }
  return buff.toString();
}

/**
 * Return the HTML code for the children of this tag.
 * @return A string with the HTML code for the contents of this tag.
 */
public String getChildrenHTML() {
  StringBuilder buff = new StringBuilder();
  for (SimpleNodeIterator e = children();e.hasMoreNodes();) {
    AbstractNode node = (AbstractNode)e.nextNode();
    buff.append(node.toHtml());
  }
  return buff.toString();
}

/**
 * Return the textual contents of this tag and it's children.
 * @return The 'browser' text contents of this tag.
 */
public String toPlainTextString() {
  StringBuffer stringRepresentation = new StringBuffer();
  for (SimpleNodeIterator e=children();e.hasMoreNodes();) {
    stringRepresentation.append(e.nextNode().toPlainTextString());
  }
  return stringRepresentation.toString();
}

/**
 * Return the textual contents of this tag and it's children.
 * @return The 'browser' text contents of this tag.
 */
public String toPlainTextString() {
  StringBuilder stringRepresentation = new StringBuilder();
  for (SimpleNodeIterator e=children();e.hasMoreNodes();) {
    stringRepresentation.append(e.nextNode().toPlainTextString());
  }
  return stringRepresentation.toString();
}

  /**
   * Add the textual contents of the children of this node to the buffer.
   * @param sb The buffer to append to.
   */
  protected void putChildrenInto(StringBuilder sb)
  {
    Node node;
    for (SimpleNodeIterator e = children (); e.hasMoreNodes ();)
    {
      node = e.nextNode ();
      // eliminate virtual tags
//            if (!(node.getStartPosition () == node.getEndPosition ()))
        sb.append (node.toHtml ());
    }
  }

/**
 * @see org.htmlparser.nodes.TagNode#accept(org.htmlparser.visitors.NodeVisitor)
 */
public void accept(NodeVisitor visitor) {
  // be invisible but show the children (if they like visits)
  NodeList children = m_decorated.getChildren();
  if (children == null) {
    return;
  }
  SimpleNodeIterator itChildren = children.elements();
  while (itChildren.hasMoreNodes()) {
    itChildren.nextNode().accept(visitor);
  }
}

/**
 * Returns the node number of the first node containing the given text.
 * This can be useful to index into the composite tag and get other children.
 * Text is compared without case sensitivity and conversion to uppercase
 * uses the supplied locale.
 * @return int The node index in the children list of the node containing
 * the text or -1 if not found.
 * @param locale The locale to use in converting to uppercase.
 * @param text The text to search for.
 */
public int findPositionOf (String text, Locale locale)
{
  Node node;
  int loc;
  
  loc = 0;
  text = text.toUpperCase (locale);
  for (SimpleNodeIterator e = children (); e.hasMoreNodes (); )
  {
    node = e.nextNode ();
    if (-1 != node.toPlainTextString ().toUpperCase (locale).indexOf (text))
      return loc;
    loc++;
  }
  return -1;
}

/**
 * Returns the node number of the first node containing the given text.
 * This can be useful to index into the composite tag and get other children.
 * Text is compared without case sensitivity and conversion to uppercase
 * uses the supplied locale.
 * @return int The node index in the children list of the node containing
 * the text or -1 if not found.
 * @param locale The locale to use in converting to uppercase.
 * @param text The text to search for.
 */
public int findPositionOf (String text, Locale locale)
{
  Node node;
  int loc;
  
  loc = 0;
  text = text.toUpperCase (locale);
  for (SimpleNodeIterator e = children (); e.hasMoreNodes (); )
  {
    node = e.nextNode ();
    if (-1 != node.toPlainTextString ().toUpperCase (locale).indexOf (text))
      return loc;
    loc++;
  }
  return -1;
}

/**
 * Searches all children who for a name attribute. Returns first match.
 * @param name Attribute to match in tag
 * @return Tag Tag matching the name attribute
 */
public Tag searchByName(String name) {
  Node node;
  Tag tag = null;
  boolean found = false;
  for (SimpleNodeIterator e = children();e.hasMoreNodes() && !found;) {
    node = e.nextNode();
    if (node instanceof Tag)
    {
      tag = (Tag)node;
      String nameAttribute = tag.getAttribute("NAME");
      if (nameAttribute!=null && nameAttribute.equals(name))
        found=true;
    }
  }
  if (found)
    return tag;
  else
    return null;
}

/**
 * @see org.htmlparser.Tag#accept(org.htmlparser.visitors.NodeVisitor)
 */
public void accept(NodeVisitor visitor) {
  // be invisible but show the children (if they like visits)
  NodeList children = m_decorated.getChildren();
  if (children == null) {
    return;
  }
  SimpleNodeIterator itChildren = children.elements();
  while (itChildren.hasMoreNodes()) {
    itChildren.nextNode().accept(visitor);
  }
}

/**
 * Searches all children who for a name attribute. Returns first match.
 * @param name Attribute to match in tag
 * @return Tag Tag matching the name attribute
 */
public Tag searchByName(String name) {
  Node node;
  Tag tag = null;
  boolean found = false;
  for (SimpleNodeIterator e = children();e.hasMoreNodes() && !found;) {
    node = e.nextNode();
    if (node instanceof Tag)
    {
      tag = (Tag)node;
      String nameAttribute = tag.getAttribute("NAME");
      if (nameAttribute!=null && nameAttribute.equals(name))
        found=true;
    }
  }
  if (found)
    return tag;
  else
    return null;
}

/**
 * @see org.htmlparser.Tag#accept(org.htmlparser.visitors.NodeVisitor)
 */
public void accept(NodeVisitor visitor) {
  // be invisible but show the children (if they like visits)
  NodeList children = m_decorated.getChildren();
  if (children == null) {
    return;
  }
  SimpleNodeIterator itChildren = children.elements();
  while (itChildren.hasMoreNodes()) {
    itChildren.nextNode().accept(visitor);
  }
}

public List<String> extractLinks(PageData pageData) {
  NodeList nodes = pageData.getNodes();
  if (nodes == null) {
    return Collections.emptyList();
  }
  NodeList linkNodes = nodes.extractAllNodesThatMatch(nodeFilter, true);
  ArrayList<String> links = new ArrayList<String>(linkNodes.size());
  SimpleNodeIterator it = linkNodes.elements();
  while (it.hasMoreNodes()) {
    Tag tag = (Tag) it.nextNode();
    String href = new String(tag.getAttribute("href"));
    href = href.trim().replaceAll("&amp;", "&");
    links.add(href);
  }
  return links;
}

/**
 * Find the textarea tag matching the given name
 * @param name Name of the textarea tag to be found within the form.
 * @return The <code>TEXTAREA</code> tag with the matching name.
 */
public TextareaTag getTextAreaTag(String name)
{
  TextareaTag textareaTag=null;
  boolean found = false;
  for (SimpleNodeIterator e=getFormTextareas ().elements();e.hasMoreNodes() && !found;)
  {
    textareaTag = (TextareaTag)e.nextNode();
    String textAreaName = textareaTag.getAttribute("NAME");
    if (textAreaName!=null && textAreaName.equals(name))
      found = true;
  }
  if (found)
    return (textareaTag);
  else
    return (null);
}

 Parser parser = new Parser("http://www.yahoo.com/");
NodeList list = parser.parse(new TagNameFilter("IMG"));

for ( SimpleNodeIterator iterator = list.elements(); iterator.hasMoreNodes(); ) {
  Tag tag = (Tag) iterator.nextNode();
  System.out.println(tag.getAttribute("src"));
}

/**
 * Gets a frame by name.
 * Names are checked without case sensitivity and conversion to uppercase
 * is performed with the locale provided.
 * @param name The name of the frame to retrieve.
 * @param locale The locale to use when converting to uppercase.
 * @return The specified frame or <code>null</code> if it wasn't found.
 */
public FrameTag getFrame (String name, Locale locale)
{
  Node node;
  FrameTag ret;
  ret = null;
  
  name = name.toUpperCase (locale);
  for (SimpleNodeIterator e = getFrames ().elements (); e.hasMoreNodes () && (null == ret); )
  {
    node = e.nextNode();
    if (node instanceof FrameTag)
    {
      ret = (FrameTag)node;
      if (!ret.getFrameName ().toUpperCase (locale).equals (name))
        ret = null;
    }
  }
  return (ret);
}

/**
 * Extracts the title from the given HTML.
 *
 * @return never null, just an empty string if not parsable.
 */
public static String extractTitle(String html) throws ParserException {
  String title = "";
  Parser parser = new Parser(html);
  NodeList matches = parser.extractAllNodesThatMatch(TITLE_FILTER);
  SimpleNodeIterator it = matches.elements();
  while (it.hasMoreNodes()) {
    TitleTag node = (TitleTag) it.nextNode();
    title = node.getTitle().trim();
  }
  return title;
}

Javadoc

The HTMLSimpleEnumeration interface is similar to NodeIterator, except that it does not throw exceptions. This interface is useful when using HTMLVector, to enumerate through its elements in a simple manner, without needing to do class casts for Node.

Most used methods

hasMoreNodes
Check if more nodes are available.
nextNode
Get the next node.

Popular in Java

Start an intent from android
scheduleAtFixedRate (Timer)
compareTo (BigDecimal)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
UnknownHostException (java.net)
Thrown when a hostname can not be resolved.
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
Top Vim plugins

How to useSimpleNodeIterator in org.htmlparser.util

Best Java code snippets using org.htmlparser.util.SimpleNodeIterator (Showing top 20 results out of 315)

How to use
SimpleNodeIterator
in
org.htmlparser.util