Refine search
public List<TaobaoModel> getTaobao(String url){ Document doc=getUrlDoc(url); if(doc==null){ return new ArrayList<>(); } List<TaobaoModel> list=new ArrayList<>(); Elements trs = doc.select("div[class=item-block item-idle sh-roundbox]"); try { for (int i=0; i<trs.size(); i++){ TaobaoModel taobaoModel=new TaobaoModel(); Element user = trs.get(i).select("div[class=seller-avatar]").get(0); taobaoModel.user=user.select("a").attr("title"); Element info = trs.get(i).select("div[class=item-info]").get(0); Elements pic = info.select("div[class=item-pic]"); Elements img=pic.get(0).select("img"); taobaoModel.image="http:"+img.attr("data-ks-lazyload-custom"); Elements attributes = info.select("div[class=item-attributes]"); taobaoModel.region=attributes.get(0).select("div[class=item-location]").get(0).text(); taobaoModel.price= MathUtil.getDouble(attributes.get(0).select("em").get(0).text()); taobaoModel.title=info.select("div[class=item-brief-desc]").get(0).text(); taobaoModel.time=info.select("span[class=item-pub-time]").get(0).text(); taobaoModel.sign=taobaoModel.getSign(); list.add(taobaoModel); } } catch (Exception e) { e.printStackTrace(); } return list; }
public List<TaobaoModel> getTaobao(String url){ Document doc=getUrlDoc(url); if(doc==null){ return new ArrayList<>(); } List<TaobaoModel> list=new ArrayList<>(); Elements trs = doc.select("div[class=item-block item-idle sh-roundbox]"); try { for (int i=0; i<trs.size(); i++){ TaobaoModel taobaoModel=new TaobaoModel(); Element user = trs.get(i).select("div[class=seller-avatar]").get(0); taobaoModel.user=user.select("a").attr("title"); Element info = trs.get(i).select("div[class=item-info]").get(0); Elements pic = info.select("div[class=item-pic]"); Elements img=pic.get(0).select("img"); taobaoModel.image="http:"+img.attr("data-ks-lazyload-custom"); Elements attributes = info.select("div[class=item-attributes]"); taobaoModel.region=attributes.get(0).select("div[class=item-location]").get(0).text(); taobaoModel.price= MathUtil.getDouble(attributes.get(0).select("em").get(0).text()); taobaoModel.title=info.select("div[class=item-brief-desc]").get(0).text(); taobaoModel.time=info.select("span[class=item-pub-time]").get(0).text(); taobaoModel.sign=taobaoModel.getSign(); list.add(taobaoModel); } } catch (Exception e) { e.printStackTrace(); } return list; }
/** * Get the <i>nth</i> matched element as an Elements object. * <p> * See also {@link #get(int)} to retrieve an Element. * @param index the (zero-based) index of the element in the list to retain * @return Elements containing only the specified element, or, if that element did not exist, an empty list. */ public Elements eq(int index) { return size() > index ? new Elements(get(index)) : new Elements(); }
for (int i = 0; i < tableHeaderEles.size(); i++) { System.out.println(tableHeaderEles.get(i).text()); for (int i = 0; i < tableRowElements.size(); i++) { Element row = tableRowElements.get(i); System.out.println("row"); Elements rowItems = row.select("td"); for (int j = 0; j < rowItems.size(); j++) { System.out.println(rowItems.get(j).text());
@Override public void downloadURL(URL url, int index) { try { Document doc = Http.url(url).get(); Elements images = doc.select("article.ep-contents img"); // Find maximum # of images for optimal filename indexing int epiLog = (int) (Math.floor(Math.log10(episodes.size())) + 1), imgLog = (int) (Math.floor(Math.log10(images.size() )) + 1); for (int i = 0; i < images.size(); i++) { String link = images.get(i).attr("src"); TapasticEpisode episode = episodes.get(index - 1); // Build elaborate filename prefix StringBuilder prefix = new StringBuilder(); prefix.append(String.format("ep%0" + epiLog + "d", index)); prefix.append(String.format("-%0" + imgLog + "dof%0" + imgLog + "d-", i + 1, images.size())); prefix.append(episode.filename.replace(" ", "-")); prefix.append("-"); addURLToDownload(new URL(link), prefix.toString()); if (isThisATest()) { break; } } } catch (IOException e) { LOGGER.error("[!] Exception while downloading " + url, e); } }
/** * Parse comments with html parser */ @NonNull public static GalleryComment[] parseComments(Document document) { try { Element cdiv = document.getElementById("cdiv"); Elements c1s = cdiv.getElementsByClass("c1"); List<GalleryComment> list = new ArrayList<>(c1s.size()); for (int i = 0, n = c1s.size(); i < n; i++) { GalleryComment comment = parseComment(c1s.get(i)); if (null != comment) { list.add(comment); } } return list.toArray(new GalleryComment[list.size()]); } catch (Throwable e) { ExceptionUtils.throwIfFatal(e); e.printStackTrace(); return EMPTY_GALLERY_COMMENT_ARRAY; } }
public static VideoBean getVideoFromWeipai(VideoBean video) throws Exception { Document dom = Jsoup.connect(video.getLongUrl()).get(); video.setIdStr(KeyGenerator.generateMD5(video.getShortUrl())); Elements divs = dom.select("div[class=video_img WscaleH]"); if (divs != null && divs.size() > 0) { video.setImage(divs.get(0).attr("data-url")); } divs = dom.select("video#video"); if (divs != null && divs.size() > 0) { video.setVideoUrl(divs.get(0).attr("src")); } return video; }
public static VideoBean getVideoFromSinaVideo(VideoBean video) throws Exception { Document dom = Jsoup.connect(video.getLongUrl()).get(); video.setIdStr(KeyGenerator.generateMD5(video.getShortUrl())); Elements divs = dom.select("video.video"); if (divs != null && divs.size() > 0) { String src = divs.get(0).attr("src"); src = src.replace("amp;", ""); video.setVideoUrl(src); } divs = dom.select("img.poster"); if (divs != null && divs.size() > 0) { video.setImage(divs.get(0).attr("src")); } return video; }
/** * Parse tag groups with html parser */ @NonNull public static GalleryTagGroup[] parseTagGroups(Document document) { try { Element taglist = document.getElementById("taglist"); Elements tagGroups = taglist.child(0).child(0).children(); List<GalleryTagGroup> list = new ArrayList<>(tagGroups.size()); for (int i = 0, n = tagGroups.size(); i < n; i++) { GalleryTagGroup group = parseTagGroup(tagGroups.get(i)); if (null != group) { list.add(group); } } return list.toArray(new GalleryTagGroup[list.size()]); } catch (Throwable e) { ExceptionUtils.throwIfFatal(e); e.printStackTrace(); return EMPTY_GALLERY_TAG_GROUP_ARRAY; } }
/** * Find an element by ID, including or under this element. * <p> * Note that this finds the first matching ID, starting with this element. If you search down from a different * starting point, it is possible to find a different element by ID. For unique element by ID within a Document, * use {@link Document#getElementById(String)} * @param id The ID to search for. * @return The first matching element by ID, starting with this element, or null if none found. */ public Element getElementById(String id) { Validate.notEmpty(id); Elements elements = Collector.collect(new Evaluator.Id(id), this); if (elements.size() > 0) return elements.get(0); else return null; }
@Nullable private static GalleryTagGroup parseTagGroup(Element element) { try { GalleryTagGroup group = new GalleryTagGroup(); String nameSpace = element.child(0).text(); // Remove last ':' nameSpace = nameSpace.substring(0, nameSpace.length() - 1); group.groupName = nameSpace; Elements tags = element.child(1).children(); for (int i = 0, n = tags.size(); i < n; i++) { String tag = tags.get(i).text(); // Sometimes parody tag is followed with '|' and english translate, just remove them int index = tag.indexOf('|'); if (index >= 0) { tag = tag.substring(0, index).trim(); } group.addTag(tag); } return group.size() > 0 ? group : null; } catch (Throwable e) { ExceptionUtils.throwIfFatal(e); e.printStackTrace(); return null; } }
private static int parsePages(Document d, String body) throws ParseException { try { Elements es = d.getElementsByClass("ptt").first().child(0).child(0).children(); return Integer.parseInt(es.get(es.size() - 2).text().trim()); } catch (Throwable e) { ExceptionUtils.throwIfFatal(e); throw new ParseException("Can't parse gallery list pages", body); } }
/** * Parse preview pages with html parser */ public static int parsePreviewPages(Document document, String body) throws ParseException { try { Elements elements = document.getElementsByClass("ptt").first().child(0).child(0).children(); return Integer.parseInt(elements.get(elements.size() - 2).text()); } catch (Throwable e) { ExceptionUtils.throwIfFatal(e); e.printStackTrace(); throw new ParseException("Can't parse preview pages", body); } }
public String tableToCsv(String url) throws IOException { Document doc = Jsoup.connect(url).get(); Elements tables = doc.select("table"); if (tables.size() != 1) { throw new IllegalStateException( "Reading html to table currently works if there is exactly 1 html table on the page. " + " The URL you passed has " + tables.size() + ". You may file a feature request with the URL if you'd like your pagae to be supported"); } Element table = tables.get(0); CsvWriterSettings settings = new CsvWriterSettings(); StringWriter stringWriter = new StringWriter(); CsvWriter csvWriter = new CsvWriter(stringWriter, settings); for (Element row : table.select("tr")) { Elements headerCells = row.getElementsByTag("th"); Elements cells = row.getElementsByTag("td"); String[] nextLine = Stream.concat(headerCells.stream(), cells.stream()) .map(Element::text).toArray(String[]::new); csvWriter.writeRow(nextLine); } return stringWriter.toString(); } }
public static VideoBean getVideoFromMeipai(VideoBean video) throws Exception { Document dom = Jsoup.connect(video.getLongUrl()).get(); Elements divs = dom.select("div#mediaPlayer"); if (divs != null && divs.size() > 0) { Element div = divs.get(0); video.setVideoUrl(div.attr("data-video")); video.setImage(div.attr("data-poster")); } video.setIdStr(KeyGenerator.generateMD5(video.getShortUrl())); return video; }