@Override public boolean matches(Element root, Element element) { if (element instanceof PseudoTextElement) return true; List<TextNode> textNodes = element.textNodes(); for (TextNode textNode : textNodes) { PseudoTextElement pel = new PseudoTextElement( org.jsoup.parser.Tag.valueOf(element.tagName()), element.baseUri(), element.attributes()); textNode.replaceWith(pel); pel.appendChild(textNode); } return false; }
String text = "<html><head></head><body><a href='http://google.com'>Don't change this link</a> Change this: http://yahoo.com foo.com</body></html>"; Document d = Jsoup.parse(text); String newHtmlCode = ""; String oldHtmlCode = d.outerHtml(); List<TextNode> textNodes = d.body().textNodes(); Matcher m = Patterns.WEB_URL.matcher(""); for (TextNode textNode : textNodes) { m.reset(textNode.text()); String fragment = ""; while (m.find()) { fragment = m.replaceAll("<a href=\"\\*\\*\\*$1\">$1</a>"); textNode.replaceWith(new Element(Tag.valueOf("span"),"").html(fragment)); } newHtmlCode = d.outerHtml().replaceAll("\"\\Q***\\E(?!https?://)", "\"http://").replaceAll("\"\\Q***\\E(https?://)", "\"$1"); } System.out.println("BEFORE:\n\n" + oldHtmlCode); System.out.println("----------------------------"); System.out.println("AFTER:\n\n" + newHtmlCode);
element.attr("href", commitUrl); element.appendText(textNode.getWholeText()); textNode.replaceWith(element);