@Nullable public static Element getElementByClass(Document doc, String className) { Elements elements = doc.getElementsByClass(className); if (elements != null && elements.size() > 0) { return elements.get(0); } else { return null; } }
private Map<String, String> parseJson(String content) { Document doc = Jsoup.parse(content); Elements elements = doc.getElementsByClass("tt-video-box"); String id = elements.get(0).attr("tt-videoid"); String imageUrl = elements.get(0).attr("tt-poster"); Map<String, String> map = new HashMap<>(); if (!TextUtils.isEmpty(id)) { map.put("id", id); } if (!TextUtils.isEmpty(imageUrl)) { map.put("imageUrl", imageUrl); } return map; }
private static int parsePages(Document d, String body) throws ParseException { try { Elements es = d.getElementsByClass("ptt").first().child(0).child(0).children(); return Integer.parseInt(es.get(es.size() - 2).text().trim()); } catch (Throwable e) { ExceptionUtils.throwIfFatal(e); throw new ParseException("Can't parse gallery list pages", body); } }
/** * Parse preview pages with html parser */ public static int parsePreviewPages(Document document, String body) throws ParseException { try { Elements elements = document.getElementsByClass("ptt").first().child(0).child(0).children(); return Integer.parseInt(elements.get(elements.size() - 2).text()); } catch (Throwable e) { ExceptionUtils.throwIfFatal(e); e.printStackTrace(); throw new ParseException("Can't parse preview pages", body); } }
private void initMap() throws Exception { industryMap = new HashMap<>(); String target = URLMapper.COMPREHENSIVE_PAGE.toString(); String content = request(new URL(target)); Document doc = Jsoup.parse(content); Elements element = doc.getElementsByClass("second-nav") .get(1).children() .get(3).children() .get(3).children() .select("a"); StringBuilder builder = new StringBuilder(); for (Element ele : element) { if (!ele.hasAttr("title") || !ele.hasAttr("href")) continue; builder.append(ele.attr("href")); industryMap.put(ele.attr("title"), new Industry(ele.attr("title"), builder.toString())); builder.delete(0, builder.length()); } }
@Override public List<Industry> collectLogic() throws Exception { List<Industry> res = new ArrayList<>(); String target = URLMapper.COMPREHENSIVE_PAGE.toString(); String content = request(new URL(target)); Document doc = Jsoup.parse(content); Elements element = doc.getElementsByClass("second-nav") .get(1).children() .get(3).children() .get(3).children() .select("a"); StringBuilder builder = new StringBuilder(); for (Element ele : element) { if (!ele.hasAttr("title") || !ele.hasAttr("href")) continue; builder.append(ele.attr("href")); res.add(new Industry(ele.attr("title"), builder.toString())); builder.delete(0, builder.length()); } return res; }
private String getHTML(String response) { Document doc = Jsoup.parse(response, "UTF-8"); Elements elements = doc.getElementsByClass("con-words"); String content = null; for (Element element : elements) {
Elements es = d.getElementsByClass("itg").first().child(0).children(); List<GalleryInfo> list = new ArrayList<>(es.size() - 1); for (int i = 1; i < es.size(); i++) { // First one is table header, skip it Elements es = d.getElementsByClass("itg").first().children(); List<GalleryInfo> list = new ArrayList<>(es.size() - 1); for (int i = 0; i < es.size(); i++) {
int x = 1; Elements pageImages = page.getElementsByClass("c-tile"); for (Element thumb : pageImages) {
eventName = htmlPage.getElementsByClass("listing-hero-body").text(); eventDescription = htmlPage.select("div.js-xd-read-more-toggle-view.read-more__toggle-view").text();
Document doc = Jsoup.connect("http://www.maib.md/").get(); Elements els = doc.getElementsByClass("bg_block_info").not(".pad_10").not(".pad_20");
@NotNull @Override public Set<SearchEntity> search(@NotNull String keyword) { Set<SearchEntity> result = new HashSet<>(); try { Document doc = Jsoup.connect(SEARCH_URL + keyword).timeout(3000).get(); Elements searchItems = doc.getElementsByClass("g"); if (!searchItems.isEmpty()) { for (Element element : searchItems) { Elements nodeA = element.getElementsByTag("a"); System.out.println(nodeA.first()); } } } catch (IOException e) { e.printStackTrace(); } return result; }
@Override @NotNull public Set<SearchEntity> search(@NotNull String keyword) { Set<SearchEntity> result = new HashSet<>(); try { Document doc = Jsoup.connect(SEARCH_URL + keyword).get(); Elements searchItems = doc.getElementsByClass("b_title"); if (!searchItems.isEmpty()) { for (Element element : searchItems) { if (element.childNodeSize() > 0 && element.child(0).childNodeSize() > 0) { Element nodeA = element.child(0).child(0); String url = nodeA.attr("href"); if (!Utils.isEmpty(url)) { System.out.println(url); } } } } } catch (IOException e) { e.printStackTrace(); } return result; }
private void fetchPageUrls(Document doc, List<String> pageUrls) { Elements imageLinks = doc.getElementsByClass("gdtm"); for (Element e : imageLinks) { e = e.select("div").first().select("a").first(); pageUrls.add(e.attr("href")); } } }
public static void main(String[] args) throws IOException { Document doc = Jsoup.connect("http://www.housetime.fm/tracklist/").get(); Elements links = doc.getElementsByClass("rc_release_list_item_picture");//Get all the divs Elements imgs = links.select("img[alt]");//get all images with alt attribute Iterator<Element> iterator = imgs.iterator(); while (iterator.hasNext()) { Element element = (Element) iterator.next(); String altString = element.attr("alt");//get the value of the alt attribute System.out.println(altString); } }
Document document = Jsoup.connect(newsLink).get(); Element element = document.getElementsByClass("sty_txt").first(); Elements children = element.children(); for (Element child : children) { Elements subChildren = child.children(); if(child.hasClass("share-icons-box") || child.hasClass("story_tag_smo") || (subChildren.size() > 0 && subChildren.first().hasClass("st_readmore_sp"))){ child.remove(); } } String story = element.text(); System.out.println("Story: " + story);
@Test public void a_block_processor_as_string_should_be_executed_when_registered_block_is_found_in_document() { JavaExtensionRegistry javaExtensionRegistry = this.asciidoctor.javaExtensionRegistry(); javaExtensionRegistry.block("yell", "org.asciidoctor.extension.YellStaticBlock"); String content = asciidoctor.convertFile( classpath.getResource("sample-with-yell-block.ad"), options().toFile(false).get()); org.jsoup.nodes.Document doc = Jsoup.parse(content, "UTF-8"); Elements elements = doc.getElementsByClass("paragraph"); assertThat(elements.size(), is(1)); assertThat(elements.get(0).text(), is("THE TIME IS NOW. GET A MOVE ON.")); }
@Test public void a_block_processor_should_be_executed_when_registered_block_is_found_in_document() { JavaExtensionRegistry javaExtensionRegistry = this.asciidoctor.javaExtensionRegistry(); javaExtensionRegistry.block("yell", YellStaticBlock.class); String content = asciidoctor.convertFile( classpath.getResource("sample-with-yell-block.ad"), options().toFile(false).get()); org.jsoup.nodes.Document doc = Jsoup.parse(content, "UTF-8"); Elements elements = doc.getElementsByClass("paragraph"); assertThat(elements.size(), is(1)); assertThat(elements.get(0).text(), is("THE TIME IS NOW. GET A MOVE ON.")); }
@Test public void a_block_processor_should_be_executed_when_registered_listing_block_is_found_in_document() { JavaExtensionRegistry javaExtensionRegistry = this.asciidoctor.javaExtensionRegistry(); javaExtensionRegistry.block("yell", YellStaticListingBlock.class); String content = asciidoctor.convertFile( classpath.getResource("sample-with-yell-listing-block.ad"), options().toFile(false).get()); org.jsoup.nodes.Document doc = Jsoup.parse(content, "UTF-8"); Elements elements = doc.getElementsByClass("paragraph"); assertThat(elements.size(), is(1)); assertThat(elements.get(0).text(), is("THE TIME IS NOW. GET A MOVE ON.")); }
@Test public void a_block_processor_should_be_executed_when_registered_listing_block_is_found_in_document() throws IOException { this.asciidoctor.createGroup() .block("yell", YellStaticListingBlock.class) .register(); String content = asciidoctor.convertFile( classpath.getResource("sample-with-yell-listing-block.ad"), options().toFile(false).get()); org.jsoup.nodes.Document doc = Jsoup.parse(content, "UTF-8"); Elements elements = doc.getElementsByClass("paragraph"); assertThat(elements.size(), is(1)); assertThat(elements.get(0).text(), is("THE TIME IS NOW. GET A MOVE ON.")); }