public List<String> selectDocumentForList(Selector selector) { if (selector instanceof ElementSelector) { ElementSelector elementSelector = (ElementSelector) selector; return elementSelector.selectList(getDocument()); } else { return selector.selectList(getFirstSourceText()); } }
/** * @param selector selector * @return result */ public String selectDocument(Selector selector) { if (selector instanceof ElementSelector) { ElementSelector elementSelector = (ElementSelector) selector; return elementSelector.select(getDocument()); } else { return selector.select(getFirstSourceText()); } }
@Override public Selectable regex(String regex, int group) { RegexSelector regexSelector = Selectors.regex(regex, group); return selectList(regexSelector, getSourceTexts()); }
private void processCountry(Page page) { List<String> provinces = page.getHtml().xpath("//*[@id=\"newAlexa\"]/table/tbody/tr/td").all(); for (String province : provinces) { String link = xpath("//@href").select(province); String title = xpath("/text()").select(province); Request request = new Request(link).setPriority(0).putExtra("province", title); page.addTargetRequest(request); } }
@Override public void process(Page page) { List<String> requests = page.getHtml().regex("<a[^<>]*href=(bbstcon\\?board=Pictures&file=[^>]*)").all(); page.addTargetRequests(requests); page.putField("title",page.getHtml().xpath("//div[@id='content']//h2/a")); page.putField("content",page.getHtml().smartContent()); }
@Override public Selectable jsonPath(String jsonPath) { JsonPathSelector jsonPathSelector = new JsonPathSelector(jsonPath); return selectList(jsonPathSelector,getSourceTexts()); } }
@Override public Selectable xpath(String xpath) { XpathSelector xpathSelector = Selectors.xpath(xpath); return selectElements(xpathSelector); }
@Override public Selectable $(String selector, String attrName) { CssSelector cssSelector = Selectors.$(selector, attrName); return selectElements(cssSelector); }
@Override public Selectable replace(String regex, String replacement) { ReplaceSelector replaceSelector = new ReplaceSelector(regex,replacement); return select(replaceSelector, getSourceTexts()); }
@Override public Selectable smartContent() { SmartContentSelector smartContentSelector = Selectors.smartContent(); return select(smartContentSelector, getSourceTexts()); }
/** * get json content of page * * @return json * @since 0.5.0 */ public Json getJson() { if (json == null) { json = new Json(rawText); } return json; }
@Override public Selectable selectList(Selector selector) { if (selector instanceof BaseElementSelector) { return selectElements((BaseElementSelector) selector); } return selectList(selector, getSourceTexts()); }
@Override public String select(String text) { for (Selector selector : selectors) { String result = selector.select(text); if (result != null) { return result; } } return null; }
@Override public Selectable links() { return selectElements(new LinksSelector()); }
@Override public String select(String text) { return selectGroup(text).get(group); }
@Override public Selectable regex(String regex) { RegexSelector regexSelector = Selectors.regex(regex); return selectList(regexSelector, getSourceTexts()); }
@Override public Selectable $(String selector) { CssSelector cssSelector = Selectors.$(selector); return selectElements(cssSelector); }
@Override public String select(String text) { for (Selector selector : selectors) { if (text == null) { return null; } text = selector.select(text); } return text; }