@Override public List<String> getURLsFromPage(Document page) { Elements elements = page.select(".shm-thumb.thumb>a").not(".shm-thumb-link"); List<String> res = new ArrayList<>(elements.size()); for (Element e : elements) { res.add(e.absUrl("href")); } return res; }
private boolean removeEmpty(Element document) { Elements emptyNodes = document.select(":empty").not("body"); if (emptyNodes.isEmpty()) { return true; } emptyNodes.remove(); return false; } }
private boolean removeEmpty(Element document) { Elements emptyNodes = document.select(":empty").not("body"); if (emptyNodes.isEmpty()) { return true; } emptyNodes.remove(); return false; } }
/** * 通过在线阅读页面补全{@code Book}的信息 * 仅可补全{@link #name},{@link #id},{@link #author},{@link #publishDate} * * @param url 书本的在线阅读页面 */ public void fillBookInfoByUrl(String url) { try { String html = new BookDownloader(this).getBookViewPageHtml(url); html = html.replaceAll("<!--", "<"); html = html.replaceAll("-->", ""); Document doc = Jsoup.parse(html); Elements nameNode = doc.getElementsByTag("title"); this.name = nameNode.text(); Elements infoNode = doc.getElementsByTag("span").not("[style]"); for (Element node : infoNode) { if (node.text().startsWith("作者:")) { this.author = node.text().substring(3, node.text().length()); } if (node.text().startsWith("出版日期:")) { this.publishDate = node.text().substring(5, node.text().length()); } } } catch (BookDLException e) { e.printStackTrace(); } }
Pattern status_and_barcode = Pattern.compile("^(.*) ([0-9A-Za-z]+)$"); Elements exemplartrs = table != null ? table.select("tr").not("#bg2") : new Elements(); DateTimeFormatter fmt = DateTimeFormat.forPattern("dd.MM.yyyy").withLocale(Locale.GERMAN);
String exceptionName = element.select(".exception-name").text(); Elements failelements = element.select(".fail").not(".label"); failelements.addAll(element.select(".error")); failelements.addAll(element.select(".fatal"));
@Test public void testTwoEmpty() { Document doc = Jsoup.parseBodyFragment("<p></p><div></div>"); m.manipulate(doc); assertTrue(doc.body().select("*").not("body").isEmpty()); }
@Test public void testHierarchyOfEmpty() { Document doc = Jsoup.parseBodyFragment("<div><p></p></div>"); m.manipulate(doc); assertTrue(doc.body().select("*").not("body").isEmpty()); } }