/** * 让html的图片变成绝对路径,这在api请求文章数据的时候,方便客户端直接浏览 * * @param html * @param domain * @return */ public static String makeImageSrcToAbsolutePath(String html, String domain) { if (StrUtils.isBlank(domain)) { return html; } Document doc = Jsoup.parse(html); Elements es = doc.select("img"); if (es != null && es.size() > 0) { for (Element e : es) { String src = e.attr("src"); if (StrUtils.isNotBlank(src) && src.startsWith("/")) { src = domain + src; e.attr("src", src); } } } return doc.body().children().toString(); }
@Override public void setDoc(Document doc, String RAW_COLUMNS) { Elements vrows = doc.select(RAW_COLUMNS); String headerDoc = vrows.toString(); doc = Jsoup.parseBodyFragment(headerDoc); this.doc = doc; }
public static void main(String[] args) throws Exception { String url = "https://www.intel.cn/content/www/cn/zh/products/processors/core/view-all.html"; Document doc = JsoupUtil.getDocument(url); // Elements ele = doc.select("firure.blade-image"); Elements divEles = doc.select("div.item-wrap2"); System.out.println(divEles.toString()); for(Element child: divEles){ printSectionDetails(child); } // System.out.println(doc.toString()); }
private void get_grade_url() { Document document = Jsoup.parse(UstsValue.login_success_main_html); Elements elements = document.select("a[href]"); Log.e("tag", elements.toString()); for (Element element : elements) { if (element.text().equals("成绩查询")) { grade_url = "http://jw.usts.edu.cn/(" + random_string + ")/" + element.attr("href"); } } if (TextUtils.isEmpty(grade_url)) { ToastUtils.showShort("验证码错误"); getVerifCode(); return; } Log.e("tag", grade_url); }
@Override public void onResponse(Call call, Response response) throws IOException { String html = new String(response.body().bytes(), "gb2312"); Document document = Jsoup.parse(html); Elements elements = document.select("article[id=main-column]"); final String content = elements.toString(); getActivity().runOnUiThread(new Runnable() { @Override public void run() { wv_education.loadDataWithBaseURL("http://jwch.usts.edu.cn/", content, "text/html", "utf-8", null); } }); } });
@Override public void onResponse(Call call, Response response) throws IOException { Document document = Jsoup.parse(response.body().string()); final String title = document.select("div[class=title]").select("h1").get(0).text(); final String author = document.select("div[class=title]").select("span").get(0).text(); String conten = null; String temp_content1 = document.select("div[class=question-answer]").toString(); String temp_content2 = document.select("div[class=content]").toString(); if (!TextUtils.isEmpty(temp_content1)) { conten = temp_content1; } else { conten = temp_content2; } final String finalConten = conten; getActivity().runOnUiThread(new Runnable() { @Override public void run() { tv_knowledge_title.setText(title); tv_knowledge_author.setText(author); wv_knowledge.loadDataWithBaseURL(null, finalConten, "text/html", "utf-8", null); } }); } });
public static void main(String... args) { scrapeLink(); } public static void scrapeLink() { // File input = new File("/tmp/input.html"); Document doc = null; try { doc = Jsoup.connect("http://www.homedepot.com").get(); Elements link = doc.select("a[href]"); String stringLink = null; for (int i = 0; i < link.size(); i++) { stringLink = link.toString(); System.out.println(stringLink); } System.out.println(link); } catch (IOException e) { e.printStackTrace(); } Element links = doc.select("a[href]").first(); System.out.println(links); }
stringLink = link.toString(); System.out.println(stringLink);
String comp=Competizioni.toString(); String Competizione=null; if(ObjPartita.size()==0){
@Override public void run() { try { Document document = Jsoup.connect(url).timeout(6000).get(); Elements elements = document.select("div[class=content]").eq(1); Elements elements1 = document.select("div[class=links]"); final String html = elements.toString() + elements1.toString(); getActivity().runOnUiThread(new Runnable() { @Override public void run() { webView.loadDataWithBaseURL(null, html, "text/html", "utf-8", null); } }); } catch (IOException e) { e.printStackTrace(); } } }).start();
@Override public void run() { try { Elements elements = Jsoup.connect(url) .timeout(10000) .ignoreContentType(true) .ignoreHttpErrors(true) .userAgent(Url.MOBBILE_AGENT) .get() .getElementsByTag("script"); Matcher m = Pattern.compile("<div class=\"chapnamesub\">[^a-z]+</div>.+<a target=\"_blank\" href=\".+\">[^a-z]+</a>").matcher(elements.toString()); while (m.find()) { String name=""; String path=""; Matcher matcher = Pattern.compile("<div class=\"chapnamesub\">([^a-z]+)</div>").matcher(m.group()); if (matcher.find()) { name = matcher.group(1); } matcher = Pattern.compile("<a target=\"_blank\" href=\"(.+)\">[^a-z]+</a>").matcher(m.group()); if (matcher.find()) { path = matcher.group(1); } urls.add(new DownloadBean(name, path)); } } catch (IOException e) { e.printStackTrace(); } latch.countDown(); } });
private String addExpandCollapseHtml(String content) { final Document doc = Jsoup.parse(content); final Elements headerElements = doc.select("div.codeHeader"); if (headerElements != null && StringUtils.isNotEmpty(headerElements.toString())) { final Element headerElement = headerElements.get(0); headerElement.addClass("hide-border-bottom"); headerElement.child(0).addClass("code-title"); headerElement.child(0).after(addCollapseSourceHtml()); final Element contentElement = doc.select("div.codeContent").get(0); contentElement.addClass("hide-toolbar"); } else { final Element contentElement = doc.select("div.codeContent").get(0); contentElement.before(addHeaderHtml()); contentElement.addClass("hide-toolbar"); } return doc.body().html(); }
Document document = Jsoup.parse(html); Elements elements = document.select("table[id=tables]").select("td"); Log.e("tag", elements.toString()); list = new ArrayList<>(); Card card = null;
String input = "<div>a</div><p>b</p><strong>c</strong>"; Document doc = Jsoup.parse(input); Elements elements = doc.select("p ~ *"); Elements group = new Elements(); group.add(elements.first().previousElementSibling()); for( Element element : elements ) { group.add(element); } // You can work with 'group' too String output = group.toString();
elements = doc.select("div#panel_Category").select("ul.panel_body > li"); if (elements != null && !"".equals(elements.toString())) { blogCategoryList = new ArrayList<>(); String category;
Page page = new Page(crawlDatum, httpResponse); String numTmp = page.select("td[style=font-size:12px; text-align:center;]").toString().replace("\t","").replace("\n","");
Document document = Jsoup.parse(html); Elements elemnts = document.getElementsByAttributeValue("class", "datelist").select("td"); Log.e("tag", elemnts.toString()); list = new ArrayList<>(); int i = 0;