@Override protected void removeChild(Node out) { super.removeChild(out); elements.remove(out); }
private void removeFloatingSuperscript(final Document document) { final Elements allPs = document.select("p"); allPs.removeIf(p -> !COMMON_SUPERSCRIPTS.contains(p.text().toLowerCase())); allPs.remove(); }
private void cleanUp(final Document document) { // Often we get p tag under a ul, ol, so lets clean that up as a li document.select("ul,ol > p").forEach(l -> l.select("p").wrap("<li></li>")); // Lots of paragraph tags with content '*', they aren't in the ppt so lets just remove them document.select("p:matches(^\\*$)").remove(); // Remove any specific sections which are empty (as they are just noise) document.select("details:empty,section:empty,aside:empty").remove(); // Remove the classes document.select(".slide-content,.slide-master-content,.slide-notes").removeAttr("class"); // Powerpoint puts a preview embedded in.. which we don't want document.select("div[id=/docProps/thumbnail.jpeg]").remove(); } }
select("meta[name=charset]").remove(); } else if (syntax == OutputSettings.Syntax.xml) { Node node = childNodes().get(0);
@Override public Document process(final Metadata metadata, final Document document) { // Under body add a document main wrapChildrenOfBodyInTag(document, "<main class=\"SpreadSheet\"></main>"); document.select("div.page").tagName("article").attr("class", "Sheet"); // Doc: Element in floating textboxes have class outside document.select("div.outside").tagName("section").removeAttr("class"); // Excel puts a preview embedded in.. which we don't want document.select("div[id=/docProps/thumbnail.jpeg]").remove(); return document; }
String data = "<div> asd asd</div><span><p> asdd </p></span>"; Document doc = Jsoup.parse(data); Elements elms = doc.select(":matchesOwn(^\\s*\\S+\\s*$)"); // do whatever you are going to do with elms System.out.println(elms); // print the elements having less than two words elms.remove(); // remove all elements from document which contains // less than 2 words in their own text System.out.println("\nprinting Document:\n"+doc);
String html = "<img src=\"marco\">Capretta</img><i>Sono misterioso</i>" + "<p color=\"white\"><font size=\"5\">Ciao</p>" + "<p>some text</p><br/> <p>another text</p></font>"; Document xmldoc = Jsoup.parse(html, "",Parser.xmlParser()); Elements imgs = xmldoc.select("img"); imgs.remove(); System.out.println(xmldoc);
private void removeAllChildren(Element el, Elements elements) { for (Element child : el.getAllElements()) { elements.remove(child); } }
/** * Clean a node of all elements of type "tag". * * @param Element * @param string tag to clean **/ private void clean(Element e, String tag) { Elements targetList = e.getElementsByTag(tag); targetList.remove(); }
public String parseText(Element topRowElement) { Element commentSpan = topRowElement.select("div.comment > span").first(); if (commentSpan == null) { return ""; } commentSpan.select("div.reply").remove(); String text = commentSpan.html().replace("<span> </span>", ""); return text; }
final String html = "<div>A</div><div>B</div><div>C</div><div>D</div>"; Document doc = Jsoup.parse(html); // (1) - Remove from html doc.select("div").first().remove(); doc.select("div").last().remove(); System.out.println(doc.body()); // (2) - Remove from list Elements divs = doc.select("div"); divs.remove(0); divs.remove(divs.size()-1); System.out.println(divs);
private boolean removeEmpty(Element document) { Elements emptyNodes = document.select(":empty").not("body"); if (emptyNodes.isEmpty()) { return true; } emptyNodes.remove(); return false; } }
private boolean removeEmpty(Element document) { Elements emptyNodes = document.select(":empty").not("body"); if (emptyNodes.isEmpty()) { return true; } emptyNodes.remove(); return false; } }
@Override public Collection<? extends Node> getDescendentNodes(final Node node) { Elements descendents; if (node instanceof Document) descendents = ((Document)node).getAllElements(); else descendents = ((Element)node).getAllElements(); descendents.remove(node); // Jsoup includes the target of getAllElements() in the result... return descendents; }
@Override public XValue apply(Elements context) { Set<Element> total = new HashSet<>(); Elements descendant = new Elements(); for (Element el:context){ Elements tmp = el.getAllElements(); //exclude self tmp.remove(el); total.addAll(tmp); } descendant.addAll(total); return XValue.create(descendant); } }
Document doc = Jsoup.parse(webText); Elements elements = doc.select("font"); for (Element e: elements) { e.parent().insertChildren(e.siblingIndex(), e.childNodes()); } elements.remove(); webText = doc.toString();