@Override public void process(Page page) { if (page.getUrl().regex(LIST_URL).match()) { List<String> ids = new JsonPathSelector("$.data[*]._id").selectList(page.getRawText()); if (CollectionUtils.isNotEmpty(ids)) { for (String id : ids) { page.addTargetRequest("http://angularjs.cn/api/article/" + id); } } } else { page.putField("title", new JsonPathSelector("$.data.title").select(page.getRawText())); page.putField("content", new JsonPathSelector("$.data.content").select(page.getRawText())); } }
@Override public String select(String text) { Object object = jsonPath.read(text); if (object == null) { return null; } if (object instanceof List) { List list = (List) object; if (list != null && list.size() > 0) { return toString(list.iterator().next()); } } return object.toString(); }
@Override public void process(Page page, BaseModel model) { String rawText = page.getRawText(); String title = new JsonPathSelector(model.getTitleRegex()).select(rawText); if (!StringUtils.isEmpty(title) && !"null".equals(title)) { page.putField("title", title); page.putField("releaseDate", new JsonPathSelector(model.getReleaseDateRegex()).select(rawText)); page.putField("author", new JsonPathSelector(model.getAuthorRegex()).select(rawText)); page.putField("content", new JsonPathSelector(model.getContentRegex()).select(rawText)); page.putField("source", page.getRequest().getUrl()); } page.addTargetRequests(page.getHtml().links().regex(model.getTargetLinksRegex()).all()); } }
@Override public Selectable jsonPath(String jsonPath) { JsonPathSelector jsonPathSelector = new JsonPathSelector(jsonPath); return selectList(jsonPathSelector,getSourceTexts()); } }
private Selector compileSelector() { switch (expressionType) { case Css: if (expressionParams.length >= 1) { return $(expressionValue, expressionParams[0]); } else { return $(expressionValue); } case XPath: return xpath(expressionValue); case Regex: if (expressionParams.length >= 1) { return regex(expressionValue, Integer.parseInt(expressionParams[0])); } else { return regex(expressionValue); } case JsonPath: return new JsonPathSelector(expressionValue); default: return xpath(expressionValue); } }
@Override public void process(Page page) { if (page.getUrl().regex(LIST_URL).match()) { List<String> ids = new JsonPathSelector("$.data[*]._id").selectList(page.getRawText()); if (CollectionUtils.isNotEmpty(ids)) { for (String id : ids) { page.addTargetRequest("http://angularjs.cn/api/article/" + id); } } } else { page.putField("title", new JsonPathSelector("$.data.title").select(page.getRawText())); page.putField("content", new JsonPathSelector("$.data.content").select(page.getRawText())); } }
public static Selector getSelector(ExtractBy extractBy) { String value = extractBy.value(); Selector selector; switch (extractBy.type()) { case Css: selector = new CssSelector(value); break; case Regex: selector = new RegexSelector(value); break; case XPath: selector = new XpathSelector(value); break; case JsonPath: selector = new JsonPathSelector(value); break; default: selector = new XpathSelector(value); } return selector; }
@Override public List<String> selectList(String text) { List<String> list = new ArrayList<String>(); Object object = jsonPath.read(text); if (object == null) { return list; } if (object instanceof List) { List<Object> items = (List<Object>) object; for (Object item : items) { list.add(toString(item)); } } else { list.add(toString(object)); } return list; } }
@Override public Selectable jsonPath(String jsonPath) { JsonPathSelector jsonPathSelector = new JsonPathSelector(jsonPath); return selectList(jsonPathSelector,getSourceTexts()); } }
@Override public List<String> selectList(String text) { List<String> list = new ArrayList<String>(); Object object = jsonPath.read(text); if (object == null) { return list; } if (object instanceof List) { List<Object> items = (List<Object>) object; for (Object item : items) { list.add(toString(item)); } } else { list.add(toString(object)); } return list; } }
@Override public String select(String text) { Object object = jsonPath.read(text); if (object == null) { return null; } if (object instanceof List) { List list = (List) object; if (list != null && list.size() > 0) { return toString(list.iterator().next()); } } return object.toString(); }