private void normalizeTextBetweenNodes(TextNode textNode, Element prevElement, Element nextElement) { String wholeText = StringUtil.normaliseWhitespace(textNode.getWholeText()).trim(); if (wholeText.isEmpty()) { boolean isSurroundedByEqualTags = nextElement.tagName().equals(prevElement.tagName()) && CHILD_TAGS.contains(nextElement.tagName()); if (isSurroundedByEqualTags) { textNode.remove(); } } }
public static void main(String[] args) { Document doc = Jsoup.parse("<p>Barcelona is a nice place to live !<br/>Other cities <b>too</b>!</p>"); dumpDocument("** BEFORE **", doc); Matcher replacer = Pattern.compile("(?i)(barcelona)").matcher(""); for (Element elt : doc.body().select("*")) { for (TextNode textNode : elt.textNodes()) { String originalText = textNode.text(); if (replacer.reset(originalText).find()) { String annotatedHtml = replacer.replaceAll("<span class=\"city\">$1</span>"); textNode.before(annotatedHtml); textNode.remove(); } } } dumpDocument("** AFTER **", doc); } private static void dumpDocument(String title, Document doc) { System.out.println(title); System.out.println(doc.html()); System.out.println(); }
private void removeWhitespaceBefore(Element element) { Node previousSibling = element.previousSibling(); if (previousSibling instanceof TextNode) { TextNode textNode = (TextNode) previousSibling; String text = textNode.getWholeText(); int startOfTrailingWhitespace = lastIndexOfNonWhitespace(text) + 1; if (startOfTrailingWhitespace <= 0) { textNode.remove(); } else if (startOfTrailingWhitespace < text.length()) { textNode.splitText(startOfTrailingWhitespace); textNode.nextSibling().remove(); } } }
textNode.remove(); computeBeforeTarget(element).before(textNode); } else if (nonWhitespaceIndex == -1) { textNode.remove(); computeAfterTarget(element).after(textNode); if (lastNonWhitespaceIndex < 0) { textNode.remove(); computeAfterTarget(element).after(textNode); textNode.remove(); computeAfterTarget(element).after(textNode);
String text = textNode.text(); if (text.trim().length() == 0) { textNode.remove(); element.before(textNode); element.remove();