@Override public void setup(Mapper<LongWritable, Indexable, LongWritable, TextDocument>.Context context) throws IOException { Configuration conf = context.getConfiguration(); tag = conf.get("Cloud9.TargetTag"); if (tag.equalsIgnoreCase("heading")) { filter = new HeadingTagFilter(); } else { filter = new TagNameFilter(tag); } }
/** * Create a wrapper over a new TagNameFilter. */ public TagNameFilterWrapper () { mFilter = new TagNameFilter (); // add the tag name choice mName = new JComboBox (); mName.setEditable (true); add (mName); mName.addItem (mFilter.getName ()); mName.addActionListener (this); }
Parser parser = new Parser(url); NodeList movies = parser.extractAllNodesThatMatch( new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "movie")));
NodeList nodes = parser.extractAllNodesThatMatch(new AndFilter(new TagNameFilter("h3"), new HasAttributeFilter("id", "h3_"+num))); SimpleNodeIterator nodeIterator = nodes.elements(); while (nodeIterator.hasMoreNodes()) { Node node = nodeIterator.nextNode(); HeadingTag tag = (HeadingTag)node; System.out.println(tag.getStringText()); }
private boolean hasOneTable() { TagNameFilter tableFilter = new TagNameFilter("table"); tables = nodes.extractAllNodesThatMatch(tableFilter); return tables.size() == 1; }
private boolean hasOneTable() { TagNameFilter tableFilter = new TagNameFilter("table"); tables = nodes.extractAllNodesThatMatch(tableFilter); return tables.size() == 1; }
/** * Get the underlying node filter object. * @return The node filter object suitable for serialization. */ public NodeFilter getNodeFilter () { TagNameFilter ret; ret = new TagNameFilter (); ret.setName (mFilter.getName ()); return (ret); }
public int countOfTagWithIdPrefix(String tag, String idPrefix) throws Exception { NodeFilter filter = new AndFilter( new TagNameFilter(tag), new HasAttributePrefixFilter("id", idPrefix)); return getMatchingTags(filter).size(); }
public int countOfTagWithIdPrefix(String tag, String idPrefix) throws Exception { NodeFilter filter = new AndFilter( new TagNameFilter(tag), new HasAttributePrefixFilter("id", idPrefix)); return getMatchingTags(filter).size(); }
public int countOfTagWithClassBelowTagWithIdPrefix(String childTag, String tagClass, String parentTag, String parentIdPrefix) throws Exception { NodeList parents = getMatchingTags( new AndFilter( new TagNameFilter(parentTag), new HasAttributePrefixFilter("id", parentIdPrefix)) ); NodeFilter[] predicates = { new TagNameFilter(childTag), new HasAttributeFilter("class", tagClass) }; NodeFilter filter = new AndFilter(predicates); NodeList matches = parents.extractAllNodesThatMatch(filter, true); return matches.size(); }
Parser parser = Parser.createParser(comment.getText(), "UTF-8"); NodeList htmlAnchorNodes = null; try { htmlAnchorNodes = parser .extractAllNodesThatMatch(new TagNameFilter("a")); } catch (ParserException e) { e.printStackTrace(); } int size = htmlAnchorNodes.size();
int size; { Parser parser = Parser.createParser(comment.getText(), "UTF-8"); NodeList htmlAnchorNodes = null; try { htmlAnchorNodes = parser .extractAllNodesThatMatch(new TagNameFilter("a")); } catch (ParserException e) { e.printStackTrace(); } size = htmlAnchorNodes.size(); }
// make some sort of constants for all the positions const int OPEN_PRICE = 0; const int HIGH_PRICE = 1; const int LOW_PRICE = 2; // .... NodeList nl = parser.parse(null); // you can also filter here NodeList values = nl.extractAllNodesThatMatch( new AndFilter(new TagNameFilter("TD"), new HasAttributeFilter("class", "t1"))); if( values.size() > 0 ) { Tag openPrice = values.elementAt(OPEN_PRICE); String openPriceValue = openPrice.getText(); // this is the text of the div }
NodeList nl = parser.parse(null); // you can also filter here NodeList divs = nl.extractAllNodesThatMatch( new AndFilter(new TagNameFilter("DIV"), new HasAttributeFilter("id", "OBJ123"))); if( divs.size() > 0 ) { Tag div = divs.elementAt(0); String text = div.getText(); // this is the text of the div }
Parser parser = new Parser("http://www.yahoo.com/"); NodeList list = parser.parse(new TagNameFilter("IMG")); for ( SimpleNodeIterator iterator = list.elements(); iterator.hasMoreNodes(); ) { Tag tag = (Tag) iterator.nextNode(); System.out.println(tag.getAttribute("src")); }
private NodeList getRows(NodeList tables) { TagNameFilter trFilter = new TagNameFilter("tr"); Node table = tables.elementAt(0); if (table.getChildren() != null) return table.getChildren().extractAllNodesThatMatch(trFilter); return new NodeList(); }
private NodeList getRows(NodeList tables) { TagNameFilter trFilter = new TagNameFilter("tr"); Node table = tables.elementAt(0); if (table.getChildren() != null) return table.getChildren().extractAllNodesThatMatch(trFilter); return new NodeList(); }
private void extractColumns(Map<String, String> map, Node row) { TagNameFilter tdFilter = new TagNameFilter("td"); if (row.getChildren() != null) { NodeList cols = row.getChildren().extractAllNodesThatMatch(tdFilter); if (cols.size() == 2) addColsToMap(map, cols); } }
private void extractColumns(Map<String, String> map, Node row) { TagNameFilter tdFilter = new TagNameFilter("td"); if (row.getChildren() != null) { NodeList cols = row.getChildren().extractAllNodesThatMatch(tdFilter); if (cols.size() == 2) addColsToMap(map, cols); } }
Parser parser = new Parser(); parser.setInputHTML(MyHTML); parser.setEncoding("UTF-8"); NodeList nl = parser.parse(null); NodeList node_list= nl.extractAllNodesThatMatch(new TagNameFilter("MyTag"),true);