public WrapElements(Environment env, Elements wrappedObject) { super(env, wrappedObject); iterator = wrappedObject.iterator(); if (iterator.hasNext()) { iteratorValue = new WrapElement(env, iterator.next()); } }
@Override @Signature public Memory rewind(Environment env, Memory... args) { iterator = getWrappedObject().iterator(); if (iterator.hasNext()) { iteratorValue = new WrapElement(env, iterator.next()); } iteratorIndex = 0; return Memory.NULL; }
private void replace(Elements elements, String attrName) { Iterator<Element> iterator = elements.iterator(); Template template = TemplateManager.me().getCurrentTemplate(); while (iterator.hasNext()) {
org.jsoup.nodes.Document doc = Jsoup.parse(in, null, ""); Iterator<org.jsoup.nodes.Element> iterator = doc.select("a") .iterator(); List<URL> urlList = new ArrayList<>();
Elements elem = doc.select("eta"); Iterator<Element> iterator = elem.iterator(); while(iterator.hasNext()) { Element etaElement = iterator.next(); Element arrT = etaElement.select("arrT"); Element prdt = etaElement.select("prdt"); // 1. parse the arrT value // 2. parse the prdt value // 3. subtract them. }
Elements inputElems =doc.select("input"); Iterator<Element> linksIt = inputElems .iterator(); while (linksIt.hasNext()) { Element inputElem = linksIt.next(); String id = inputElem.attr("id"); if(id.equals("small-input")){ name = inputElem.attr("name"); value= inputElem.attr("value"); } }
Elements divElements = jsDoc.getElementsByTag("div"); Iterator<Element> iterator = divElements.iterator(); StringBuilder builder = new StringBuilder(); while (iterator.hasNext()){ Element divElement = iterator.next() if (divElement.attr("class").equals("article-content")){ builder.append(divElement.text()); if (iterator.hasNext()) { builder.append(", "); } } } text = builder.toString();
/** * Constructor for reading embedded vCards. * @param embeddedVCard the HTML element of the embedded vCard * @param pageUrl the original URL of the HTML page */ private HCardParser(Element embeddedVCard, String pageUrl) { this.pageUrl = pageUrl; vcardElements = new Elements(embeddedVCard); vcardElementsIt = vcardElements.iterator(); }
Elements inputElements = document.select("ul li input"); for (Iterator<Element> iterator = inputElements.iterator(); iterator.hasNext();) { Element inputElement = iterator.next(); Element labelElement = inputElement.nextElementSibling(); System.out.println(inputElement.attr("value")); System.out.println(labelElement.ownText()); }
public static void main(String[] args) throws Exception { System.out.println("Started"); String url = "http://www.4icu.org/reviews/index2.htm"; Document doc = Jsoup.connect(url).userAgent("Mozilla").get(); Elements cells = doc.select("td.i"); Iterator<Element> iterator = cells.iterator(); while (iterator.hasNext()) { Element cell = iterator.next(); String university = cell.select("a").text(); String country = cell.nextElementSibling().select("img").attr("alt"); System.out.printf("country : %s, university : %s %n", country, university); } }
public static void main(String[] args) throws IOException { Document doc = Jsoup.connect("http://www.housetime.fm/tracklist/").get(); Elements links = doc.getElementsByClass("rc_release_list_item_picture");//Get all the divs Elements imgs = links.select("img[alt]");//get all images with alt attribute Iterator<Element> iterator = imgs.iterator(); while (iterator.hasNext()) { Element element = (Element) iterator.next(); String altString = element.attr("alt");//get the value of the alt attribute System.out.println(altString); } }
public Map<String, String> parse(Document doc) { Elements elements = doc.select("table[id=conf]").select("tbody").select("tr"); Iterator<Element> iter = elements.iterator(); Map<String, String> configs = new HashMap<String, String>(); while(iter.hasNext()) { Element element = iter.next(); Elements tds = element.children(); String key = tds.get(0).text(); String value = tds.get(1).text(); configs.put(key, value); } return configs; } }
Document doc = Jsoup.parse(input, "UTF-8"); Elements elem = doc.select(".blueTab header h2"); for (Iterator<Element> iterator = elem.iterator(); iterator.hasNext();) { Element element = iterator.next(); if (element.text().equals("Energy")) // your comparison text { Element tableElement = element.parent().nextElementSibling(); //Your got the expected table Element as per your requirement } }
@Override public Map<String, Long> parse(Document doc) { Elements elements = doc.select("a[href*=singlejobcounter]"); Iterator<Element> iter = elements.iterator(); Map<String, Long> counters = new HashMap<String, Long>(); while(iter.hasNext()) { Element element = iter.next().parent(); String metricName = element.text(); long metricValue = Long.parseLong(element.nextElementSibling() .nextElementSibling().nextElementSibling().text() .replace(",", "").trim()); counters.put(metricName, metricValue); } return counters; } }
Connection.Response response = Jsoup.connect("http://www.4icu.org/reviews/index2.htm") .userAgent("Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0; H010818)") .method(Connection.Method.GET) .followRedirects(false) .execute(); Document doc = response.parse(); Elements cells = doc.select("td.i"); Iterator<Element> iterator = cells.iterator(); while (iterator.hasNext()) { Element cell = iterator.next(); String university = cell.select("a").text(); String country = cell.nextElementSibling().select("img").attr("alt"); System.out.printf("country : %s, university : %s %n", country, university); } System.out.println(response.headers());
private static void replace(Elements elements, String attrName) { String cdnDomain = Jboot.config(JbootRenderConfig.class).getCdn(); Iterator<Element> iterator = elements.iterator(); while (iterator.hasNext()) { Element element = iterator.next(); if (element.hasAttr("cdn-exclude")) { continue; } String url = element.attr(attrName); if (StrUtils.isBlank(url) || !url.startsWith("/") || url.startsWith("//")) { continue; } url = cdnDomain + url; element.attr(attrName, url); } } }
public final static void removeNodesBySelector(Element target, String selector, boolean pullupChildren) { Elements removeNodes = target.select(selector); Iterator<Element> it = removeNodes.iterator(); Element rm; while (it.hasNext()) { rm = it.next(); if (target == rm) { continue; } if (rm.ownerDocument() == null) { continue; } if (pullupChildren) { pullupChildren(rm); } rm.remove(); } }
/** * Returns a document whose {@code <body>} element contains the given HTML fragment. */ private Document getFragmentAsDocument(CharSequence value) { // using the XML parser ensures that all elements in the input are retained, also if they actually are not allowed at the given // location; E.g. a <td> element isn't allowed directly within the <body> element, so it would be used by the default HTML parser. // we need to retain it though to apply the given white list properly; See HV-873 Document fragment = Jsoup.parse( value.toString(), baseURI, Parser.xmlParser() ); Document document = Document.createShell( baseURI ); // add the fragment's nodes to the body of resulting document Iterator<Element> nodes = fragment.children().iterator(); while ( nodes.hasNext() ) { document.body().appendChild( nodes.next() ); } return document; } }
/** * Returns a document whose {@code <body>} element contains the given HTML fragment. */ private Document getFragmentAsDocument(CharSequence value) { // using the XML parser ensures that all elements in the input are retained, also if they actually are not allowed at the given // location; E.g. a <td> element isn't allowed directly within the <body> element, so it would be used by the default HTML parser. // we need to retain it though to apply the given white list properly; See HV-873 Document fragment = Jsoup.parse( value.toString(), "", Parser.xmlParser() ); Document document = Document.createShell( "" ); // add the fragment's nodes to the body of resulting document Iterator<Element> nodes = fragment.children().iterator(); while ( nodes.hasNext() ) { document.body().appendChild( nodes.next() ); } return document; }
/** * Returns a document whose {@code <body>} element contains the given HTML fragment. */ private Document getFragmentAsDocument(CharSequence value) { // using the XML parser ensures that all elements in the input are retained, also if they actually are not allowed at the given // location; E.g. a <td> element isn't allowed directly within the <body> element, so it would be used by the default HTML parser. // we need to retain it though to apply the given white list properly; See HV-873 Document fragment = Jsoup.parse( value.toString(), "", Parser.xmlParser() ); Document document = Document.createShell( "" ); // add the fragment's nodes to the body of resulting document Iterator<Element> nodes = fragment.children().iterator(); while ( nodes.hasNext() ) { document.body().appendChild( nodes.next() ); } return document; } }