/** * Filter the list with the given filter non-recursively. * @param filter The filter to use. * @return A new node array containing the nodes accepted by the filter. * This is a linear list and preserves the nested structure of the returned * nodes only. */ public NodeList extractAllNodesThatMatch (NodeFilter filter) { return (extractAllNodesThatMatch (filter, false)); }
/** * Filter the list with the given filter non-recursively. * @param filter The filter to use. * @return A new node array containing the nodes accepted by the filter. * This is a linear list and preserves the nested structure of the returned * nodes only. */ public NodeList extractAllNodesThatMatch (NodeFilter filter) { return (extractAllNodesThatMatch (filter, false)); }
/** * Extracts all text nodes from the given NodeList that are accepted by * the specified filter. */ public static String extractText(NodeList nodeList, NodeFilter filter) { return toText(nodeList.extractAllNodesThatMatch(filter, true)); }
/** * Convenience method to search for nodes of the given type. * @param classType The class to search for. * @param recursive If <code>true<code> digs into the children recursively. */ public NodeList searchFor (Class classType, boolean recursive) { return (extractAllNodesThatMatch (new NodeClassFilter (classType), recursive)); }
private <T extends Node> List<T> filter(final Class<T> nodeType, NodeFilter... filters) { final NodeFilter filter; if (filters.length == 1) filter = filters[0]; else filter = new AndFilter(filters); try { return list(nodes.extractAllNodesThatMatch(filter, true)); } catch (ParserException e) { return Collections.emptyList(); } }
private boolean hasOneTable() { TagNameFilter tableFilter = new TagNameFilter("table"); tables = nodes.extractAllNodesThatMatch(tableFilter); return tables.size() == 1; }
private boolean hasOneTable() { TagNameFilter tableFilter = new TagNameFilter("table"); tables = nodes.extractAllNodesThatMatch(tableFilter); return tables.size() == 1; }
/** * Returns the specified attribute value of the first node that is accepted * by the given filter. */ public static String extractAttribute(NodeList nodeList, NodeFilter filter, String attributeName) throws ParserException { NodeList matches = nodeList.extractAllNodesThatMatch(filter, true); if (matches.size() > 0) { return ((TagNode) matches.elementAt(0)).getAttribute(attributeName); } return null; }
Parser parser = new Parser("http://stackoverflow.com/questions/7293729/parsing-using-htmlparser/"); parser.setEncoding("UTF-8"); NodeList nl = parser.parse(null); NodeList div = nl.extractAllNodesThatMatch(new TagNameFilter("div"),true); System.out.println(div.toString());
/** * Returns the number of columns/cells in the given row, including cell spacing. */ private static int getColumnCount( TableRow row ) { Node[] cells = row.getChildren().extractAllNodesThatMatch( HTML_ROW_FILTER ).toNodeArray(); int cols = 0; for ( Node cell : cells ) { Integer colSpan = MathUtils.parseInt( ((TagNode) cell).getAttribute( "colspan" ) ); cols += colSpan != null ? colSpan : 1; } return cols; }
private NodeList getRows(NodeList tables) { TagNameFilter trFilter = new TagNameFilter("tr"); Node table = tables.elementAt(0); if (table.getChildren() != null) return table.getChildren().extractAllNodesThatMatch(trFilter); return new NodeList(); }
private void extractColumns(Map<String, String> map, Node row) { TagNameFilter tdFilter = new TagNameFilter("td"); if (row.getChildren() != null) { NodeList cols = row.getChildren().extractAllNodesThatMatch(tdFilter); if (cols.size() == 2) addColsToMap(map, cols); } }
private NodeList getRows(NodeList tables) { TagNameFilter trFilter = new TagNameFilter("tr"); Node table = tables.elementAt(0); if (table.getChildren() != null) return table.getChildren().extractAllNodesThatMatch(trFilter); return new NodeList(); }
private void extractColumns(Map<String, String> map, Node row) { TagNameFilter tdFilter = new TagNameFilter("td"); if (row.getChildren() != null) { NodeList cols = row.getChildren().extractAllNodesThatMatch(tdFilter); if (cols.size() == 2) addColsToMap(map, cols); } }
Parser parser = new Parser(); parser.setInputHTML(MyHTML); parser.setEncoding("UTF-8"); NodeList nl = parser.parse(null); NodeList node_list= nl.extractAllNodesThatMatch(new TagNameFilter("MyTag"),true);
private static String getMeta(NodeList nodeList, String name, boolean httpEquiv) { NodeFilter filter = new AndFilter( new NodeClassFilter(MetaTag.class), new AttributeNodeFilter(httpEquiv ? "http-equiv" : "name", name)); NodeList nodes = nodeList.extractAllNodesThatMatch(filter, true); if (nodes.size() > 0) { Tag tag = (Tag) nodes.elementAt(0); return tag.getAttribute("content"); } return null; }
private NodeList getMatchingTags(NodeFilter filter) throws Exception { String html = examiner.html(); Parser parser = new Parser(new Lexer(new Page(html))); NodeList list = parser.parse(null); NodeList matches = list.extractAllNodesThatMatch(filter, true); return matches; }
private NodeList getMatchingTags(NodeFilter filter) throws Exception { String html = examiner.html(); Parser parser = new Parser(new Lexer(new Page(html))); NodeList list = parser.parse(null); NodeList matches = list.extractAllNodesThatMatch(filter, true); return matches; }
public int countOfTagWithClassBelowTagWithIdPrefix(String childTag, String tagClass, String parentTag, String parentIdPrefix) throws Exception { NodeList parents = getMatchingTags( new AndFilter( new TagNameFilter(parentTag), new HasAttributePrefixFilter("id", parentIdPrefix)) ); NodeFilter[] predicates = { new TagNameFilter(childTag), new HasAttributeFilter("class", tagClass) }; NodeFilter filter = new AndFilter(predicates); NodeList matches = parents.extractAllNodesThatMatch(filter, true); return matches.size(); }
public int countOfTagWithClassBelowTagWithIdPrefix(String childTag, String tagClass, String parentTag, String parentIdPrefix) throws Exception { NodeList parents = getMatchingTags( new AndFilter( new TagNameFilter(parentTag), new HasAttributePrefixFilter("id", parentIdPrefix)) ); NodeFilter[] predicates = { new TagNameFilter(childTag), new HasAttributeFilter("class", tagClass) }; NodeFilter filter = new AndFilter(predicates); NodeList matches = parents.extractAllNodesThatMatch(filter, true); return matches.size(); }