@Override public void setup(Mapper<LongWritable, Indexable, LongWritable, TextDocument>.Context context) throws IOException { Configuration conf = context.getConfiguration(); tag = conf.get("Cloud9.TargetTag"); if (tag.equalsIgnoreCase("heading")) { filter = new HeadingTagFilter(); } else { filter = new TagNameFilter(tag); } }
/** * Predicate to determine whether or not to keep the given node. * The behaviour based on this outcome is determined by the context * in which it is called. It may lead to the node being added to a list * or printed out. See the calling routine for details. * @return <code>true</code> if the node is to be kept, <code>false</code> * if it is to be discarded. * @param node The node to test. */ public boolean accept (Node node) { return (mFilter.accept (node)); }
/** * Get the underlying node filter object. * @return The node filter object suitable for serialization. */ public NodeFilter getNodeFilter () { TagNameFilter ret; ret = new TagNameFilter (); ret.setName (mFilter.getName ()); return (ret); }
/** * Create a wrapper over a new TagNameFilter. */ public TagNameFilterWrapper () { mFilter = new TagNameFilter (); // add the tag name choice mName = new JComboBox (); mName.setEditable (true); add (mName); mName.addItem (mFilter.getName ()); mName.addActionListener (this); }
/** * Convert this filter into Java code. * Output whatever text necessary and return the variable name. * @param out The output buffer. * @param context Three integers as follows: * <li>indent level - the number of spaces to insert at the beginning of each line</li> * <li>filter number - the next available filter number</li> * <li>filter array number - the next available array of filters number</li> * @return The variable name to use when referencing this filter (usually "filter" + context[1]++) */ public String toJavaCode (StringBuilder out, int[] context) { String ret; ret = "filter" + context[1]++; spaces (out, context[0]); out.append ("TagNameFilter "); out.append (ret); out.append (" = new TagNameFilter ();"); newline (out); spaces (out, context[0]); out.append (ret); out.append (".setName (\""); out.append (mFilter.getName ()); out.append ("\");"); newline (out); return (ret); }
/** * Invoked when an action occurs on the combo box. * @param event Details about the action event. */ public void actionPerformed (ActionEvent event) { Object source; Object[] selection; source = event.getSource (); if (source == mName) { selection = mName.getSelectedObjects (); if ((null != selection) && (0 != selection.length)) mFilter.setName ((String)selection[0]); } } }
/** * Assign the underlying node filter for this wrapper. * @param filter The filter to wrap. * @param context The parser to use for conditioning this filter. * Some filters need contextual information to provide to the user, * i.e. for tag names or attribute names or values, * so the Parser context is provided. */ public void setNodeFilter (NodeFilter filter, Parser context) { Set set; mFilter = (TagNameFilter)filter; set = new HashSet (); context.reset (); try { for (NodeIterator iterator = context.elements (); iterator.hasMoreNodes (); ) addName (set, iterator.nextNode ()); } catch (ParserException pe) { // oh well, we tried } for (Iterator iterator = set.iterator (); iterator.hasNext (); ) mName.addItem (iterator.next ()); mName.setSelectedItem (mFilter.getName ()); }
NodeList nl = parser.extractAllNodesThatMatch(new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "Level1")));
NodeList img=parser.extractAllNodesThatMatch(new AndFilter(new TagNameFilter("img"),new HasAttributeFilter("align","right")));
Parser parser = new Parser(url); NodeList movies = parser.extractAllNodesThatMatch( new AndFilter(new TagNameFilter("div"), new HasAttributeFilter("class", "movie")));
Parser fullWebpage = new Parser("WEBADDRESS"); NodeList nl = fullWebpage.extractAllNodesThatMatch(new TagNameFilter("<insert html tag>")); NodeList tds = nodes.extractAllNodesThatMatch(new TagNameFilter("a"),true); String data = tds.toHtml();
NodeList nodes = parser.extractAllNodesThatMatch(new AndFilter(new TagNameFilter("h3"), new HasAttributeFilter("id", "h3_"+num))); SimpleNodeIterator nodeIterator = nodes.elements(); while (nodeIterator.hasMoreNodes()) { Node node = nodeIterator.nextNode(); HeadingTag tag = (HeadingTag)node; System.out.println(tag.getStringText()); }
private boolean hasOneTable() { TagNameFilter tableFilter = new TagNameFilter("table"); tables = nodes.extractAllNodesThatMatch(tableFilter); return tables.size() == 1; }
private boolean hasOneTable() { TagNameFilter tableFilter = new TagNameFilter("table"); tables = nodes.extractAllNodesThatMatch(tableFilter); return tables.size() == 1; }
NodeFilter filter1 = new AndFilter(new TagNameFilter("IMG"), new HasParentFilter(new HasAttributeFilter("id", "featured_story_1"), true)); NodeList list = parser.parse(filter1); for(int i = 0; i < list.size(); i++) { Node node = list.elementAt(i); ImageTag image = (ImageTag)node; System.out.println(image.getImageURL()); }
public int countOfTagWithIdPrefix(String tag, String idPrefix) throws Exception { NodeFilter filter = new AndFilter( new TagNameFilter(tag), new HasAttributePrefixFilter("id", idPrefix)); return getMatchingTags(filter).size(); }
public int countOfTagWithIdPrefix(String tag, String idPrefix) throws Exception { NodeFilter filter = new AndFilter( new TagNameFilter(tag), new HasAttributePrefixFilter("id", idPrefix)); return getMatchingTags(filter).size(); }
public int countOfTagWithClassBelowTagWithIdPrefix(String childTag, String tagClass, String parentTag, String parentIdPrefix) throws Exception { NodeList parents = getMatchingTags( new AndFilter( new TagNameFilter(parentTag), new HasAttributePrefixFilter("id", parentIdPrefix)) ); NodeFilter[] predicates = { new TagNameFilter(childTag), new HasAttributeFilter("class", tagClass) }; NodeFilter filter = new AndFilter(predicates); NodeList matches = parents.extractAllNodesThatMatch(filter, true); return matches.size(); }
Parser parser = Parser.createParser(comment.getText(), "UTF-8"); NodeList htmlAnchorNodes = null; try { htmlAnchorNodes = parser .extractAllNodesThatMatch(new TagNameFilter("a")); } catch (ParserException e) { e.printStackTrace(); } int size = htmlAnchorNodes.size();
public int countOfTagWithClassBelowTagWithIdPrefix(String childTag, String tagClass, String parentTag, String parentIdPrefix) throws Exception { NodeList parents = getMatchingTags( new AndFilter( new TagNameFilter(parentTag), new HasAttributePrefixFilter("id", parentIdPrefix)) ); NodeFilter[] predicates = { new TagNameFilter(childTag), new HasAttributeFilter("class", tagClass) }; NodeFilter filter = new AndFilter(predicates); NodeList matches = parents.extractAllNodesThatMatch(filter, true); return matches.size(); }