private void normaliseStructure(String tag, Element htmlEl) { Elements elements = this.getElementsByTag(tag); Element master = elements.first(); // will always be available as created above if not existent if (elements.size() > 1) { // dupes, move contents to master List<Node> toMove = new ArrayList<>(); for (int i = 1; i < elements.size(); i++) { Node dupe = elements.get(i); toMove.addAll(dupe.ensureChildNodes()); dupe.remove(); } for (Node dupe : toMove) master.appendChild(dupe); } // ensure parented by <html> if (!master.parent().equals(htmlEl)) { htmlEl.appendChild(master); // includes remove() } }
/** * Parse a fragment of HTML into the {@code body} of a Document. * * @param bodyHtml fragment of HTML * @param baseUri base URI of document (i.e. original fetch location), for resolving relative URLs. * * @return Document, with empty head, and HTML parsed into body */ public static Document parseBodyFragment(String bodyHtml, String baseUri) { Document doc = Document.createShell(baseUri); Element body = doc.body(); List<Node> nodeList = parseFragment(bodyHtml, body, baseUri); Node[] nodes = nodeList.toArray(new Node[nodeList.size()]); // the node list gets modified when re-parented for (int i = nodes.length - 1; i > 0; i--) { nodes[i].remove(); } for (Node node : nodes) { body.appendChild(node); } return doc; }
/** * Removes this node from the DOM, and moves its children up into the node's parent. This has the effect of dropping * the node but keeping its children. * <p> * For example, with the input html: * </p> * <p>{@code <div>One <span>Two <b>Three</b></span></div>}</p> * Calling {@code element.unwrap()} on the {@code span} element will result in the html: * <p>{@code <div>One Two <b>Three</b></div>}</p> * and the {@code "Two "} {@link TextNode} being returned. * * @return the first child of this node, after the node has been unwrapped. Null if the node had no children. * @see #remove() * @see #wrap(String) */ public Node unwrap() { Validate.notNull(parentNode); final List<Node> childNodes = ensureChildNodes(); Node firstChild = childNodes.size() > 0 ? childNodes.get(0) : null; parentNode.addChildren(siblingIndex, this.childNodesAsArray()); this.remove(); return firstChild; }
/** * Clears the children and attributes of the given element. * * @param design * the element to be cleared */ public static void clearElement(Element design) { Attributes attr = design.attributes(); for (Attribute a : attr.asList()) { attr.remove(a.getKey()); } List<Node> children = new ArrayList<>(); children.addAll(design.childNodes()); for (Node node : children) { node.remove(); } }
private static void deleteNodes(List<Node> nodes) { for (Object node : nodes.toArray()) ((Node) node).remove(); }
public static void printAndRemove(Node node, String reason) { i(reason, node); node.remove(); } }
public final static Element wrapElementsToSingleNode(List<Node> elements) { Element groupNode = new GroupNode(); List<Node> list = new ArrayList<Node>(elements); for (Node node : list) { node.remove(); groupNode.appendChild(node); } return groupNode; }
public final static void pullupChildren(Element elem) { List<Node> childrenNodes = new ArrayList<>(elem.childNodes()); for (Node node : childrenNodes) { node.remove(); elem.before(node); } } }
public void stripComments(Document doc) { List<Node> comments = new ArrayList<>(); doc.getAllElements().forEach( elem -> { if ( ! elem.tagName().equals("style") && ! elem.equals("script") ) { elem.childNodes().forEach( child -> { if ( child instanceof Comment) { comments.add(child); } }); } }); comments.forEach(node -> node.remove()); }
private static String stripHTMLComments(String html) { if (html == null) { return null; } org.jsoup.nodes.Document doc = Jsoup.parseBodyFragment(html); List<org.jsoup.nodes.Node> nodes = new ArrayList<org.jsoup.nodes.Node>(); List<org.jsoup.nodes.Node> comments = new ArrayList<org.jsoup.nodes.Node>(); nodes.add(doc); while (!nodes.isEmpty()) { org.jsoup.nodes.Node node = nodes.remove(0); if (node instanceof org.jsoup.nodes.Comment) { comments.add(node); } else { nodes.addAll(node.childNodes()); } } for (org.jsoup.nodes.Node victim : comments) { victim.remove(); } return doc.body().html(); } }
private Element changeElementTag(Element e, String newTag) { Element newElement = document.createElement(newTag); /* JSoup gives us the live child list, so we need to make a copy. */ List<Node> copyOfChildNodeList = new ArrayList<Node>(); copyOfChildNodeList.addAll(e.childNodes()); for (Node n : copyOfChildNodeList) { n.remove(); newElement.appendChild(n); } e.replaceWith(newElement); return newElement; }
private static void removeCommentsRecursively(Node node) { int i = 0; while (i < node.childNodeSize()) { Node child = node.childNode(i); if (child instanceof Comment) { child.remove(); } else { removeCommentsRecursively(child); i++; } } }
protected void removeHtmlComments(Document doc) { HtmlCommentsNodeVisitor htmlCommentsNodeVisitor = new HtmlCommentsNodeVisitor(); doc.traverse(htmlCommentsNodeVisitor); for (Node htmlCommentNode : htmlCommentsNodeVisitor.getHtmlCommentNodes()) { htmlCommentNode.remove(); } }
private void removeElementPreserveChildren(Element element) { final Element parent = element.parent(); for (Node child : new ArrayList<Node>(element.childNodes())) { child.remove(); element.before(child); } element.remove(); if (parent != null) { normalizeTextNodes(parent); } } }
private Element inlineTextToParagraph(Node next) { if (next == null) { return null; } List<Node> paragraphChildren = new ArrayList<Node>(); do { if (next instanceof Element && ((Element) next).isBlock()) { break; } else { paragraphChildren.add(next); } } while ((next = next.nextSibling()) != null); if (paragraphChildren.isEmpty()) { return null; } Element lastParagraph = new Element(P_TAG, ""); for (Node child : paragraphChildren) { child.remove(); lastParagraph.appendChild(child.clone()); } return lastParagraph; }
child.remove(); lastParagraph.appendChild(child.clone());
private void removeWhitespaceBefore(Element element) { Node previousSibling = element.previousSibling(); if (previousSibling instanceof TextNode) { TextNode textNode = (TextNode) previousSibling; String text = textNode.getWholeText(); int startOfTrailingWhitespace = lastIndexOfNonWhitespace(text) + 1; if (startOfTrailingWhitespace <= 0) { textNode.remove(); } else if (startOfTrailingWhitespace < text.length()) { textNode.splitText(startOfTrailingWhitespace); textNode.nextSibling().remove(); } } }
protected Element renderTemplate(Document doc, AttributesRequire attrs) throws Exception { if (attrs != null) { List<AttributeSetter> attrList = attrs.getAttrList(); Element body = doc.body(); for (AttributeSetter attributeSetter : attrList) { attributeSetter.set(body); } } RenderUtil.applySnippets(doc); Element grp = new GroupNode(); List<Node> children = new ArrayList<>(doc.body().childNodes()); for (Node node : children) { node.remove(); grp.appendChild(node); } return grp; }