public void replaceWith(Node in) { originElement.replaceWith(in); }
private static void replaceNode(Element original, Element replacement) { replacement.remove(); original.replaceWith(replacement); } }
File input = new File("your.html"); Document doc = Jsoup.parse(input, "UTF-8"); Elements links = doc.select("a[href]"); while (links.hasNext()) { Element link = iterator.next(); Element bold = doc.createElement("b").appendText(link.text()); link.replaceWith(bold); } // now do something with... // doc.outerHtml()
Elements elements = doc.select("img[src$=a.gif]"); for(Element element : elements) { element.replaceWith(new TextNode("A", null)); }
Document doc = Jsoup.parse(html); for (Element small: doc.select("small")) { small.replaceWith(new Element(Tag.valueOf("span"), "").text(small.text())); } System.out.println(doc.html()); // prints html with <small> replaced by <span>
for (Element elem : document.select(".singolo-contenuto a")) { if(elem.parents().hasClass("list_attachments")) continue; String href = elem.attr("href"); String text = elem.text(); elem.replaceWith(new TextNode(href + " " + text, "")); } String result = document.select(".singolo-contenuto").text();
Document doc = Jsoup.parse(inputHtml); Elements links = doc.select("a"); String baseUri = links.get(0).baseUri(); for(Element link : links) { Node linkText = new TextNode(link.html(), baseUri); // optionally wrap it in a tag instead: // Element linkText = doc.createElement("span"); // linkText.html(link.html()); link.replaceWith(linkText); }
public class Foo { public static void main(String... args) { String xml = "<p>The <ORGANIZATION>Peter Hall Company</ORGANIZATION>'s production of ''Blithe Spirit,'' directed by <PERSON>Thea Sharrock</PERSON>, is one of those attractively and unimaginatively upholstered productions of brittle classics that become must-have middlebrow tickets every few years. Most notable for <PERSON>Penelope Keith</PERSON>'s startlingly brisk and no-nonsense interpretation of the madcap medium <ORGANIZATION>Madame Arcati</ORGANIZATION>, Ms. <PERSON>Sharrock</PERSON>'s take on <PERSON>Coward</PERSON>'s 1941 comedy of a man visited by his dead wife's impish spirit delivers bright badinage, dazed double takes and marital melees at the same efficient clip.</p>"; Document doc = Jsoup.parse(xml); for (Element e: doc.select("p > ORGANIZATION, p > PERSON")) { System.out.printf("-> %s: %s\n", e.tagName(), e.text()); e.replaceWith(new TextNode(e.text(), "")); } System.out.println("\nFiltered out:\n" + doc.select("p").html()); } }
// Load HTML file String charsetName = "UTF-8"; Document doc = Jsoup.parse(new File("dummy.html"), charsetName); System.out.println("BEFORE:\n" + doc.outerHtml()); // Replace each link nodes with its respective CSS file content for (Element link : doc.select("link[rel=stylesheet]")) { String cssFilename = link.attr("href"); Element style = new Element(Tag.valueOf("style"), ""); style.appendText("/* " + cssFilename + " */"); style.appendText(loadCssFileContent(cssFilename, charsetName)); link.replaceWith(style); } System.out.println("\nAFTER:\n" + doc.outerHtml()); private static String loadCssFileContent(String path, String charsetName) throws IOException { byte[] encoded = Files.readAllBytes(Paths.get(path)); return new String(encoded, charsetName); }
/** * Finds a set of elements through a CSS selector and swaps its tag with * that from its parent. * * @param root * body element with source divisions to upgrade * @param selector * CSS selector for the elements to swap with its parent */ public final void swapTagWithParent(final Element root, final String selector) { final Iterable<Element> elements; // Selected elements Element parent; // Parent element String text; // Preserved text checkNotNull(root, "Received a null pointer as root element"); checkNotNull(selector, "Received a null pointer as selector"); // Selects and iterates over the elements elements = root.select(selector); for (final Element element : elements) { parent = element.parent(); // Takes the text out of the element text = element.text(); element.text(""); // Swaps elements parent.replaceWith(element); element.appendChild(parent); // Sets the text into what was the parent element parent.text(text); } }
private Element changeElementTag(Element e, String newTag) { Element newElement = document.createElement(newTag); /* JSoup gives us the live child list, so we need to make a copy. */ List<Node> copyOfChildNodeList = new ArrayList<Node>(); copyOfChildNodeList.addAll(e.childNodes()); for (Node n : copyOfChildNodeList) { n.remove(); newElement.appendChild(n); } e.replaceWith(newElement); return newElement; }
String sgml = "<data>\n<subdata>\n<l1item>\n <note>\n <para>hello\n </para>\n </note>\n</l1item>\n</subdata>\n<subdata>\n<l2item>\n <para> dont delete \n </para>\n</l2item>\n<l3item>\n <note>\n <para>hello\n </para>\n </note>\n <para> dont delete \n </para>\n</l3item>\n</subdata>\n</data>"; Document doc = Parser.xmlParser().parseInput(sgml, ""); System.out.println("BEFORE:\n" + doc.html()); Elements onlyChildNotes = doc.select("note:only-child"); for (Element note : onlyChildNotes) { Element noteParent = note.parent(); if (noteParent != null) { noteParent.replaceWith(note); } } System.out.println("AFTER:\n" + doc.html());
import org.jsoup.*; import org.jsoup.nodes.*; import org.jsoup.select.*; public class MyJsoupExample { public static void main(String args[]) { String inputText = "<html><head></head><body><p><img src=\"getCustomers.do?custCode=2&customerId=3334¶m1=123\"/></p>" + "<p>someText <img src=\"getCustomers.do?custCode=2&customerId=3340¶m2=456\"/></p></body></html>"; Document doc = Jsoup.parse(inputText); Elements myImgs = doc.select("img[src*=customerId=3340"); for (Element element : myImgs) { element.replaceWith(new TextNode("my replaced text", "")); } System.out.println(doc.toString()); } }
/** * Replace link tags with style tags in order to keep the same inclusion * order * * @param doc * the html document * @param cssContents * the list of external css files with their content */ private void internStyles(Document doc, List<ExternalCss> cssContents) { Elements els = doc.select(CSS_LINKS_SELECTOR); for (Element e : els) { if (!TRUE_VALUE.equals(e.attr(SKIP_INLINE))) { String path = e.attr(HREF_ATTR); Element style = new Element(Tag.valueOf(STYLE_TAG), ""); style.appendChild(new DataNode(getCss(cssContents, path), "")); e.replaceWith(style); } } }
img.replaceWith(new TextNode(possibleEmoji)); iframe.replaceWith(Jsoup.parse(html).body().child(0)); } else { iframe.remove();
public static void main(final String[] args) { final String input = "<span style=\"font-family: Arial;\">TEXT</span>"; final OutputSettings settings = new OutputSettings(); settings.prettyPrint(false); final Document document = Jsoup.parseBodyFragment(input); document.outputSettings(settings); final Tag tag = Tag.valueOf("arial"); final Element span = document.getElementsByTag("span").get(0); final Element newElement = new Element(tag, ""); newElement.html(span.html()); span.replaceWith(newElement); System.out.print(document.body().children()); }
private void convertTablesToLists(final Document document) { // If a two column table, where the first col is always the same (and non-alphabetic), then drop // it and convert the other row to a list. document.select("table").forEach(table -> { String firstColumnValue = null; for (final Element tr : table.select("tr")) { final Elements cells = tr.select("td"); if (cells.size() != 2) { return; } final Element cell = cells.first(); final String first = cell.text().trim(); if (firstColumnValue != null) { if (!firstColumnValue.equals(first)) { return; } } else { firstColumnValue = first; } } // If we are here then all the first columns are the same... // So we convert to a list... final Element ul = new Element(Tag.valueOf("ul"), ""); for (final Element tr : table.select("tr")) { final Elements cells = tr.select("td"); ul.appendElement("li").html(cells.last().html()); } table.replaceWith(ul); }); }
protected void cleanNodes(Element body, String tag) { for (Element element : body.getElementsByTag(tag)) { if (element == null || element.parent() == null) { continue; } for (Element child : element.children().select(tag)) { cleanNodes(child, tag); } element.replaceWith(new TextNode(element.text() + "<br/>", "")); } } }
figure = new Element(Tag.valueOf("figure"), ""); img.replaceWith(figure); figure.appendChild(img);
e.replaceWith(new TextNode(pre.text(), ""));