public String tableToCsv(String url) throws IOException { Document doc = Jsoup.connect(url).get(); Elements tables = doc.select("table"); if (tables.size() != 1) { throw new IllegalStateException( "Reading html to table currently works if there is exactly 1 html table on the page. " + " The URL you passed has " + tables.size() + ". You may file a feature request with the URL if you'd like your pagae to be supported"); } Element table = tables.get(0); CsvWriterSettings settings = new CsvWriterSettings(); StringWriter stringWriter = new StringWriter(); CsvWriter csvWriter = new CsvWriter(stringWriter, settings); for (Element row : table.select("tr")) { Elements headerCells = row.getElementsByTag("th"); Elements cells = row.getElementsByTag("td"); String[] nextLine = Stream.concat(headerCells.stream(), cells.stream()) .map(Element::text).toArray(String[]::new); csvWriter.writeRow(nextLine); } return stringWriter.toString(); } }
@Override protected void readItems(Element design, DesignContext context) { Set<T> selected = new HashSet<>(); List<T> items = design.children().stream() .map(child -> readItem(child, selected, context)) .collect(Collectors.toList()); if (!items.isEmpty()) { setItems(items); } selected.forEach(this::setValue); }
@Override protected void readItems(Element design, DesignContext context) { Set<T> selected = new HashSet<>(); List<T> items = design.children().stream() .map(child -> readItem(child, selected, context)) .collect(Collectors.toList()); deselectAll(); if (!items.isEmpty()) { setItems(items); } selected.forEach(this::select); }
.select("input") .select("[name=csrf_token], [name=csrfToken]") .stream() .findFirst() .map(input -> new SimpleEntry<String, String>(input.attr("name"), input.attr("value")));
/** * {@inheritDoc} */ @Override protected Stream<Element> getStructureStream(final String data) { return Jsoup.parse(data) .select("table.findList tr td.result_text") .stream(); }
public static List<String> extractJsonLdParts(Document document) { Elements elements = document.select("script[type=\"application/ld+json\"]"); return elements.stream() .map(Element::html) .collect(Collectors.toList()); }
for (Element divTag : divTags) { Elements aTags = divTag.getElementsByTag("a"); aTags.stream() .filter(aTag -> aTag.child(0).attr("data-src").contains(".jp")) .forEach(aTag -> { System.out.println(aTag.child(0).attr("data-src")); }); aTags.stream() .filter(aTag -> aTag.child(0).attr("src").contains(".jp")) .forEach(aTag -> { System.out.println(aTag.child(0).attr("src")); }); }
Elements elements = Jsoup.parse(markup).getAllElements().select(".myclass"); List<Element> comments = elements.stream().collect(Collectors.<Element>toList()); for(Element comment : comments) { System.out.println(comment.html()); }
/** * {@inheritDoc} */ @Override protected Stream<Element> getStructureStream(final String data) { return Jsoup.parse(data) .getElementsByTag("ImdbEntity") .stream(); }
@Override public CompletableFuture<Stream<Image>> fetchImages(String url) { return htmlClient.load(url) .thenApply(doc -> doc.select(".images_table td").stream()) .thenApply(elements -> elements.map(this::toImage)); }
public void attribute(String selector, String attributeName, String text) { Elements es = document().select(selector); if (es.stream().noneMatch(e -> e.attr(attributeName).equals(text))) { addViolation(String.format("DOM要素 '%s' のattribute %s の内容が '%s' になっていません", selector, attributeName, text)); } } }
private static Optional<String> selectMeta(final Document document, final String metaName) { return document .select("meta") .stream() .filter(it -> it.hasAttr("name") && it.hasAttr("value") && metaName.equals(it.attr("name"))) .findFirst() .map(it -> it.attr("value")); }
private static void findAndSetComment(Element docstringEl, SdkEntity sdkEntity) { Elements paragraphs = docstringEl.select("p"); String comment = paragraphs.stream() .map(Element::text) .collect(Collectors.joining("\n\n")); sdkEntity.setDocumentation(trimToNull(comment)); }
public Optional<String> resolve(final Document document, final GithubId issueGithubId) { return document.select(".discussion-item") .stream() .filter(this::isPullRequest) .filter(this::isMerged) .map(this::resolvePullRequestGithubId) .map(this::fetchPullrequest) .filter(pullRequest -> pullRequest != null && pullRequestFixesIssue(pullRequest, issueGithubId)) .map(pullRequest -> pullRequest.getUser().getLogin()) .filter(StringUtils::isNotEmpty) .findFirst(); }
public void contentMissing(String selector, String text) { Elements es = document().select(selector); if (es.stream().anyMatch(e -> e.hasText() && e.text().trim().equals(text))) { addViolation(String.format("DOM要素 '%s' に文字列 '%s' をもつものは表示されないはずですが、表示されています", selector, text)); } }
@Override public Result<List<String>> parse(Response response) { Elements elements = response.body().css("#picture > p > img"); List<String> src = elements.stream().map(element -> element.attr("src")).collect(Collectors.toList()); return new Result<>(src); } }
private static void insertElements(Element element, Consumer<Element> action) { if (element instanceof Document) { element.getAllElements().stream() .filter(item -> !(item instanceof Document) && element.equals(item.parent())) .forEach(action::accept); } else { action.accept(element); } }
@Override protected Result parse(Response response) { List<String> titles = response.body().css("div.areabg1 .area-half.left div.tabContents td a").stream() .map(Element::text) .collect(Collectors.toList()); return new Result(titles); } }
public boolean attribute(String selector, String attributeName, String text) { Elements es = document().select(selector); if (es.stream().noneMatch(e -> e.attr(attributeName).equals(text))) { addViolation(String.format("DOM要素 '%s' のattribute %s の内容が '%s' になっていません", selector, attributeName, text)); return wrap(false); } return wrap(true); } }
private static Map<String, String> getSignalToUrlMapping(Element scriptDocument) { return scriptDocument.getElementsByAttributeStarting(FORM_SIGNAL_ATTR_PREFIX).stream() .flatMap(element -> element.attributes().asList().stream()) .filter(allAttr -> allAttr.getKey().startsWith(FORM_SIGNAL_ATTR_PREFIX)) .collect( Collectors.toMap(e -> e.getKey().replace(FORM_SIGNAL_ATTR_PREFIX, StringUtils.EMPTY), Entry::getValue)); } }