/** * Split this text node into two nodes at the specified string offset. After splitting, this node will contain the * original text up to the offset, and will have a new text node sibling containing the text after the offset. * @param offset string offset point to split node at. * @return the newly created text node containing the text after the offset. */ public TextNode splitText(int offset) { final String text = coreValue(); Validate.isTrue(offset >= 0, "Split offset must be not be negative"); Validate.isTrue(offset < text.length(), "Split offset must not be greater than current text length"); String head = text.substring(0, offset); String tail = text.substring(offset); text(head); TextNode tailNode = new TextNode(tail); if (parent() != null) parent().addChildren(siblingIndex()+1, tailNode); return tailNode; }
void outerHtmlHead(Appendable accum, int depth, Document.OutputSettings out) throws IOException { if (out.prettyPrint() && ((siblingIndex() == 0 && parentNode instanceof Element && ((Element) parentNode).tag().formatAsBlock() && !isBlank()) || (out.outline() && siblingNodes().size()>0 && !isBlank()) )) indent(accum, depth, out); boolean normaliseWhite = out.prettyPrint() && parent() instanceof Element && !Element.preserveWhitespace(parent()); Entities.escape(accum, coreValue(), out, false, normaliseWhite, false); }
private boolean isPreserveWhitespace() { Node parent = node.parent(); while (parent != null) { if (parent.nodeName().equals("pre")) { //$NON-NLS-1$ return true; } parent = parent.parent(); } return false; } }
Document doc = Jsoup.connect(site).get(); doc.body().traverse(new NodeVisitor() { @Override public void head(Node node, int depth) { if (node instanceof TextNode) { TextNode tn = ((TextNode) node); // Try to improve this filter for the nodes who contain // texts with a whitespaces if (tn.text().replaceAll("\\s*", "").length() > 0) { System.out.println("Tag:" + tn.parent().nodeName() + ", text:" + tn.text()); } } } @Override public void tail(Node node, int depth) { // Do Nothing } });
private static void appendNormalisedText(StringBuilder accum, TextNode textNode) { String text = textNode.getWholeText(); if (preserveWhitespace(textNode.parent()) || textNode instanceof CDataNode) accum.append(text); else StringUtil.appendNormalisedWhitespace(accum, text, lastCharIsWhitespace(accum)); }
int nonWhitespaceIndex = firstIndexOfNonWhitespace(text); if (nonWhitespaceIndex > 0) { affectedParents.add(textNode.parent()); computeBeforeTarget(element).before(textNode); affectedParents.add(textNode.parent()); } else if (nonWhitespaceIndex == -1) { computeAfterTarget(element).after(textNode); affectedParents.add(textNode.parent()); computeAfterTarget(element).after(textNode); affectedParents.add(textNode.parent()); } else if (lastNonWhitespaceIndex < (text.length() - 1)) { affectedParents.add(textNode.parent()); computeAfterTarget(element).after(textNode); affectedParents.add(textNode.parent());
private String getTextNodeText(TextNode tn, boolean normalText) { String input = normalText ? tn.text() : tn.getWholeText(); Node prev = tn.previousSibling(); Node next = tn.nextSibling(); boolean parentIsBlock = isBlock(tn.parent()); if(isBlock(prev)) { input = ltrim(input); } else if(prev == null && parentIsBlock) { input = ltrim(input); } else if(normalText && prev instanceof TextNode) { TextNode tprev = (TextNode)prev; if(EMPTY_MATCHER.matcher(tprev.text()).matches()) { input = ltrim(input); } } if(input.length() > 0) { if(isBlock(next)) { input = rtrim(input); } else if(next == null && parentIsBlock) { input = rtrim(input); } else if(normalText && next instanceof TextNode) { TextNode tnext = (TextNode)next; if(EMPTY_MATCHER.matcher(tnext.text()).matches()) { input = rtrim(input); } } } return input; }
normalizeTextNodes((Element) textNode.parent());