org.jsoup.select.Elements.forEach java code examples

if (!endingWithMp4.isEmpty()) {
  List<String> list = new ArrayList<>();
  endingWithMp4.forEach((e) -> list.add(e.attr("src")));
  return getBestQualityLink(list);

Document parse = Jsoup.parse(content);
Elements tableElements = parse.select("table");
tableElements.forEach(element -> element.addClass("table table-bordered"));
Elements aElements = parse.select("p");
if (aElements != null && aElements.size() > 0) {
 aElements.forEach(element -> {
  try {
   String href = element.text();

@Override
public void manipulate(Document document) {
 document.select("p").forEach(this::processParagraph);
}

@Override
public void manipulate(Document document) {
 document.select("p").forEach(this::processParagraph);
}

@Override
public List<String> getAll() {
  if (allElements == null) {
    return null;
  }
  List<String> list = new ArrayList<>();
  allElements.forEach((e) -> list.add(e.text()));
  return list;
}

 String source = "<span class=\"zg_hrsr_ladder\">in&nbsp;<a href=\"https://www.amazon.de/gp/bestsellers/books/ref=pd_zg_hrsr_b_1_1\">B&uuml;cher</a> &gt; <a href=\"https://www.amazon.de/gp/bestsellers/books/287480/ref=pd_zg_hrsr_b_1_2\">Krimis & Thriller</a> &gt; <b><a href=\"https://www.amazon.de/gp/bestsellers/books/419954031/ref=pd_zg_hrsr_b_1_3_last\">Deutschland</a></b></span>";

Document htmlDocument = Jsoup.parse(source, "UTF-8");

Elements category = htmlDocument.select("span.zg_hrsr_ladder a");

category.forEach(aElement -> {
  System.out.println(aElement.text());
});

private void convertBreaksBetweenPages(final Document document) {
 document.select("article.page ~ article.page").forEach(a -> {
  a.before("<hr class=\"pagebreak\" />");
 });
}

 import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class JSoup {

  public static void main(String[] args) {

    String fichier = "<html>" +
              "<head></head>" +
              "<body></body>" +
              "</html>";
    Document dc = Jsoup.parse(fichier, "utf-8");
    Elements elements = dc.getAllElements();
    elements.forEach( element -> System.out.println(element.nodeName()));
  }
}

public static List<MatchedDate> extractFromMeta(Document document) {
  Map<String, String> metaValues = Maps.newHashMap();
  document.select("meta").forEach(m -> {
    String name = m.attr("name");
    String property = m.attr("property");
    String content = m.attr("content");
    if (!Strings.isNullOrEmpty(name)) {
      metaValues.put(name.toLowerCase(), content);
    } else if (!Strings.isNullOrEmpty(property)) {
      metaValues.put(property.toLowerCase(), content);
    }
  });
  return DATE_META_KEYS.stream()
      .filter(k -> metaValues.get(k) != null)
      .map(k -> new MatchedDate(metaValues.get(k), "META:" + k))
      .collect(Collectors.toList());
}

private void convertSpiltBulletsToLists(final Document document) {
 document.select("p ~ p").forEach(p -> {
  final Element previous = p.previousElementSibling();
  final String previousText = previous.text().trim();
  if (LISTITEM_SYMBOLS.stream().anyMatch(s -> s.equals(previousText))) {
   // So bullet then paragraph...
   // Delete prvious and turn the other into a list item
   previous.remove();
   p.tagName("li");
  }
 });
}

@Override
public void manipulate(Document document) {
 // Find elements which need to be spilt up
 Set<Element> elementsWithBr = new HashSet<>();
 document.select("br").forEach(e -> elementsWithBr.add(e.parent()));
 // For each parent
 elementsWithBr.forEach(
   e -> {
    List<Element> runs = collectRuns(document, e);
    if (!runs.isEmpty()) {
     addRunsToDom(e, runs);
    }
   });
}

@Override
public void manipulate(Document document) {
 // Find elements which need to be spilt up
 Set<Element> elementsWithBr = new HashSet<>();
 document.select("br").forEach(e -> elementsWithBr.add(e.parent()));
 // For each parent
 elementsWithBr.forEach(
   e -> {
    List<Element> runs = collectRuns(document, e);
    if (!runs.isEmpty()) {
     addRunsToDom(e, runs);
    }
   });
}

 private void cleanUp(final Document document) {
  // Often we get p tag under a ul, ol, so lets clean that up as a li
  document.select("ul,ol > p").forEach(l -> l.select("p").wrap("<li></li>"));

  // Lots of paragraph tags with content '*', they aren't in the ppt so lets just remove them
  document.select("p:matches(^\\*$)").remove();

  // Remove any specific sections which are empty (as they are just noise)
  document.select("details:empty,section:empty,aside:empty").remove();

  // Remove the classes
  document.select(".slide-content,.slide-master-content,.slide-notes").removeAttr("class");

  // Powerpoint puts a preview embedded in.. which we don't want
  document.select("div[id=/docProps/thumbnail.jpeg]").remove();
 }
}

private void convertParagraphsToList(final Document document) {
 document.select("p,li,td").forEach(p -> {
  final String text = p.text();
  for (final String symbol : LISTITEM_SYMBOLS) {
   if (text.contains(symbol)) {
    p.tagName("ul");
    p.empty();
    for (final String s : text.split(symbol)) {
     p.appendElement("li").text(s);
    }
    // Only do this once!
    return;
   }
  }
 });
}

private static List<MatchedString> extractTextsWithJsoup(Document document, HttpSource source) {
  List<MatchedString> texts = Lists.newArrayList();
  for (String selector : source.getTextSelectors()) {
    document.select(selector).forEach(e -> texts.add(new MatchedString(e.text(), selector)));
  }
  if (!texts.isEmpty()) {
    return texts;
  }
  String itemPropValue = document.select("[itemprop*=articleBody] p").text();
  if (itemPropValue != null && !itemPropValue.trim().isEmpty()) {
    return Lists.newArrayList(new MatchedString(itemPropValue, "[itemprop*=articleBody] p"));
  }
  return document.select("p").stream()
      .map(e -> new MatchedString(e.text(), "p"))
      .collect(Collectors.toList());
}

private void process(final InputStream stream, final String charset, final ContentHandler handler,
  final Metadata metadata) throws IOException, SAXException {
 final Document document = Jsoup.parse(stream, charset, "");
 clean(document);
 document.head().select("meta").forEach(m -> {
  final String name = m.attr("name");
  final String value = m.attr("content");
  if (!Strings.isNullOrEmpty(name)) {
   metadata.add(name, value);
  }
 });
 document.traverse(new JsoupToSaxVisitor(handler));
}

public void stripComments(Document doc) {
  List<Node> comments = new ArrayList<>();
  doc.getAllElements().forEach( elem -> {
    if ( ! elem.tagName().equals("style") && ! elem.equals("script") ) {
      elem.childNodes().forEach( child -> {
        if ( child instanceof Comment) {
          comments.add(child);
        }
      });
    }
  });
  comments.forEach(node -> node.remove());
}

private void inspectCustomElements(org.jsoup.nodes.Element childElement,
    org.jsoup.nodes.Element templateRoot) {
  if (isInsideTemplate(childElement, templateRoot)) {
    storeNotInjectableElementId(childElement);
  }
  collectCustomElement(childElement, templateRoot);
  childElement.children()
      .forEach(child -> inspectCustomElements(child, templateRoot));
}

  @Override
  public void store(TaskResponse response) throws Exception {
    if (response.isGroup("oschina.blog")) {
      response.select(".item").forEach(item -> {
        String href = item.select(".blog-title-link").attr("href");
        try {
          response.getQueue().push(new Task(href,"oschina.blog.item"));
        } catch (Exception e) {
          e.printStackTrace();
        }
      });
    } else if (response.isGroup("oschina.blog.item")) {
      Elements content = response.select(".article-detail");
      String title = content.select(".header").text().replace("顶 原 荐","");
      String autor = content.select("blog-meta > div:nth-child(1) > a").text();
      System.out.println(String.format("文章标题: %s  作者: %s",title,autor));
    }
  }
}

protected List<WebUrl> getUrls(final WebUrl webUrl, final Document document) {
 if (webUrl.getDepth() >= this.seed.getDepth()) {
  return null;
 }
 List<WebUrl> list = new ArrayList<>();
 Elements links = document.getElementsByTag("a");
 if (links.isEmpty()) {
  return null;
 }
 links.forEach(e -> {
  String url = e.absUrl("href");
  if (!this.filter.contains(url)) {
   WebUrl childUrl = new WebUrl(webUrl.getDepth() + 1, url);
   childUrl.setDepth(webUrl.getDepth() + 1);
   childUrl.setUrl(url);
   if (parser.shouldVisit(webUrl, childUrl) && isMatched(url)) {
    list.add(childUrl);
   }
   this.filter.add(url);
  }
 });
 return list;
}

Popular methods of Elements

get
size
first
Get the first matched element.
text
Get the combined text of all the matched elements. Note that it is possible to get repeats if the ma
isEmpty
attr
Set an attribute on all matched elements.
select
Find matching elements within this element list.
remove
stream
html
Set the inner HTML of each matched element.
last
Get the last matched element.
iterator

Popular in Java

Running tasks concurrently on multiple threads
onRequestPermissionsResult (Fragment)
getContentResolver (Context)
runOnUiThread (Activity)
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
Menu (java.awt)
JTextField (javax.swing)
Runner (org.openjdk.jmh.runner)
Best plugins for Eclipse

How to use forEachmethodin org.jsoup.select.Elements

Best Java code snippets using org.jsoup.select.Elements.forEach (Showing top 20 results out of 315)

How to use
forEach
method
in
org.jsoup.select.Elements