public static void main(String... args) throws IOException { Document document = Jsoup.parse("<div>" + "<a href=\"#\"> I don't want this text </a>" + "**I want to retrieve this text**" + "</div>"); Element a = document.select("a").first(); Node node = a.nextSibling(); System.out.println(node.toString()); }
public Node nextSibling() { return originElement.nextSibling(); }
Elements elements = doc.select("div[id=\"center-text\"] strong"); for(Element element : elements) { System.out.println("nextSibling: " + element.nextSibling()); }
Elements elements = doc.body().select("p").select("b"); for(int i=0; i<elements.size(); i++){ Element para = elements.get(i); versesList.add(para.nextSibling().toString()); }
Document doc = Jsoup.connect("http://www.eleicoes2012.info/adeilza-psb-40656/").get(); Elements spans = doc.select("span.legenda"); for(Element span: spans) { System.out.println(span.nextSibling()); }
@Override public String nextNodeText(){ if(element==null){ return null; } return element.nextSibling().toString(); }
@Override public Node getNextSibling() { return NodeAdaptors.getNode(element.nextSibling()); }
String url="http://www.cpsc.gov/en/Recalls/2004/CPSC-NETGEAR-Inc-Announce-Recall-of-Wall-Plug-Ethernet-Bridges-/"; Document doc = Jsoup.connect(url).get(); Elements archived = doc.select("div.archived strong"); for (Element element: archived){ System.out.println("KEY: " + element.text()); System.out.println("VALUE: " + element.nextSibling()); }
final String html = "<span class=\"lead\">Written on</span> 05.01.2013 at 12:16 <br />"; Document doc = Jsoup.parse(html); for( Element element : doc.select("span.lead") ) { // Simple output of the date; 'toString()' gives you the value System.out.println(element.nextSibling().toString()); }
Document document = Jsoup.connect(url).get(); Elements hrs = document.select("hr"); Pattern pattern = Pattern.compile("(\\d+%%\\d+)"); for (Element hr : hrs) { String textAfterHr = hr.nextSibling().toString(); Matcher matcher = pattern.matcher(textAfterHr); while (matcher.find()) { System.out.println(matcher.group(1)); // <-- There, your data. } }
Document doc = // ... eg. parse File / String here or connect to a website Node value; for( Element element : doc.select("strong ~ *") ) { // element.previousSibling() is possible too value = element.nextSibling(); System.out.println(value); }
final String html = "<p> ... "; // your HTML here Document doc = Jsoup.parse(html); List<String> list = new ArrayList<>(); for( Element element : doc.select("b") ) { list.add(element.nextSibling().toString()); }
@Override public Node getNextSibling() { return NodeAdaptors.getNode(element.nextSibling()); }
Document doc = Jsoup.parse( "<br><b>Price:</b> Rs. 24,900.00 <br>"); Element el = doc.select("b").first(); //get the element which contains "Price:" String text = ((TextNode) el.nextSibling()).text();
Document doc = Jsoup.parse( "<br><b>Price:</b> Rs. 24,900.00 <br>"); Element el = doc.select("b").first(); String text = ((TextNode) el.nextSibling()).text();
final String html = "<div class=example>Text #1</div> \"Another Text 1\"\n" + "<div class=example>Text #2</div> \"Another Text 2\" "; Document doc = Jsoup.parse(html); for( Element element : doc.select("div.example") ) // Select all the div tags { TextNode next = (TextNode) element.nextSibling(); // Get the next node of each div as a TextNode System.out.println(next.text()); // Print the text of the TextNode }
public class StackOverflow20973268 { private static String input = "<div class=\"content\">" + "<div class=\"label\">Company Name: </div>" + "Cartell Chemical Co., Ltd." + "<br/>" + "<div class=\"label\">Business Owner: </div>" + "Michael Chen" + "<br/>" + "<div class=\"label\">Employees: </div>" + "210" + "<br/>" + "<div class=\"label\">Main markets: </div>" + "North America, Europe, China, South Asia" + "<br/>" + "<div class=\"label\">Business Type: </div>" + "Manufacturer" + "<br/>" + "</div>"; public static void main(String[] args) throws IOException { Document doc = Jsoup.parse(input); Elements labels = doc.select("div.content div.label"); for (Element label : labels) { System.out.println(String.format("%s:%s", label.text().trim(), label.nextSibling().outerHtml())); } } }
public static String findFragment(String html, String fixedStart) { Document doc = Jsoup.parse(html); Element myBTag = doc .select("b:matches(^" + Pattern.quote(fixedStart) + "$)") .first(); StringBuilder sb = new StringBuilder(); boolean foundNonEmpty = false; Node currentSibling = myBTag.nextSibling(); while (currentSibling != null && !foundNonEmpty) { if (currentSibling.nodeName().equals("b")) { Element b = (Element) currentSibling; if (!b.text().trim().isEmpty()) foundNonEmpty = true; } sb.append(currentSibling.toString()); currentSibling = currentSibling.nextSibling(); } return sb.toString(); }
String html = "" +"<p>" +" <a href=\"http://www.today.com/video/jill-martin-rescues-savannah-guthrie-from-her-guest-room-mess-604921923959\" rel=\"nofollow\"> Jill Martin rescues Savannah Guthrie from her guest room mess </a> " +" <a href=\"http://www.today.com/video/4-simple-ways-to-clear-your-clutter-this-year-596741699678\" rel=\"nofollow\"> 4 simple ways to clear your clutter this year </a> " +" <a href=\"http://www.today.com/video/staying-home-on-new-years-eve-great-ideas-to-celebrate-at-home-594027587814\" rel=\"nofollow\"> Staying home on New Year's Eve? Great ideas to celebrate at home </a> " +" <a href=\"http://www.today.com/video/heres-how-to-set-a-functional-christmas-table-591622211749\" rel=\"nofollow\"> Here's how to set a functional Christmas table </a> " +"</p>"; Document doc = Jsoup.parse(html); String tag = null; for (Element element : doc.select("*") ) { tag = element.tagName(); if ( "a".equalsIgnoreCase( tag ) ) { System.out.println("element : "+element.ownText()+"; nextElementSibling: "+element.nextElementSibling()+"" ); } if ( StringUtils.containsIgnoreCase(element.ownText(), "Jill Martin rescues Savannah") ) { System.out.println("element : "+element.ownText()+"; nextElementSibling: "+element.nextElementSibling()+"" ); System.out.println("tag : "+tag+"; nextNodeSibling: "+element.nextSibling()+"" ); System.out.println("element : "+element.ownText()+"; previousElementSibling: "+element.previousElementSibling()+"" ); } }
void parseTopDoc() throws IOException { String source = "http://www.nco.ncep.noaa.gov/pmb/docs/grib2/grib2_doc.shtml"; Document doc = Jsoup.parse(new URL(source), 5 * 1000); // 5 sec timeout //System.out.printf("%s%n", doc); Elements links = doc.select("a[href]"); for (Element link : links) { //System.out.printf("%s", link); Node sib = link.nextSibling(); String title = null; if (sib != null) { String sibt = sib.toString(); title = StringUtil2.remove(sibt, "-").trim(); //System.out.printf(" == '%s'", title); } if (link.text().equals("Table 4.2")) { //System.out.printf(" == "); parseTable42(link.attr("abs:href"), link.text(), title); } else { if (link.text().startsWith("Table 4")) { //System.out.printf(" == "); parseCodeTable(link.attr("abs:href"), link.text(), title); } } //System.out.printf("%n"); } }