/** * Extracts the text from the given html content, assuming the given html encoding.<p> * * @param html the content to extract the plain text from * @param encoding the encoding to use * * @return the text extracted from the given html content * * @throws Exception if something goes wrong */ public static String html2text(String html, String encoding) throws Exception { // create the converter instance CmsHtml2TextConverter visitor = new CmsHtml2TextConverter(); return visitor.process(html, encoding); }
setMarker("=", open); setIndentation(2, open); appendLinebreak(2); break; case 1: // H2 setMarker("==", open); setIndentation(3, open); appendLinebreak(2); break; case 2: // H3 setMarker("===", open); setIndentation(4, open); appendLinebreak(2); break; case 3: // H4 setMarker("====", open); setIndentation(5, open); appendLinebreak(2); break; case 4: // H5 setMarker("=====", open); setIndentation(6, open); appendLinebreak(2); break; case 5: // H6 setMarker("=======", open); setIndentation(7, open); appendLinebreak(2); break;
/** * @see org.htmlparser.visitors.NodeVisitor#visitEndTag(org.htmlparser.Tag) */ @Override public void visitEndTag(Tag tag) { m_appendBr = false; appendLinebreaks(tag, false); String attribute = m_attributeMap.remove(tag.getParent()); if (attribute != null) { appendText(attribute); } }
/** * @see org.htmlparser.visitors.NodeVisitor#visitTag(org.htmlparser.Tag) */ @Override public void visitTag(Tag tag) { m_appendBr = true; appendLinebreaks(tag, true); if (tag.getTagName().equals("IMG")) { appendText("##IMG##"); } String href = tag.getAttribute("href"); if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(href)) { appendAttribute(tag, " [" + href.trim() + "]"); } String src = tag.getAttribute("src"); if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(src)) { appendAttribute(tag, " [" + src.trim() + "]"); } String title = tag.getAttribute("title"); if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(title)) { appendAttribute(tag, " {" + title.trim() + "}"); } String alt = tag.getAttribute("alt"); if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(alt)) { appendAttribute(tag, " {" + alt.trim() + "}"); } }
text = collapse(text); appendLinebreak(m_storedBrCount); appendIndentation(); m_brCount = 0; if ((word.length() + 1 + m_lineLength) > m_maxLineLength) { m_appendBr = true; appendLinebreak(1); appendIndentation(); m_brCount = 0; } else {
/** * Gets the diff HTML based on the selected display options.<p> * * @return the diff HTML to display * * @throws Exception if something goes wrong */ public String getDiffHtml() throws Exception { CmsDiffViewMode mode = "all".equals(m_diffMode) ? CmsDiffViewMode.ALL : CmsDiffViewMode.DIFF_ONLY; String text1 = m_text1; String text2 = m_text2; if ("text".equals(m_textMode)) { text1 = CmsHtml2TextConverter.html2text(text1, "UTF-8"); text2 = CmsHtml2TextConverter.html2text(text2, "UTF-8"); } CmsHtmlDifferenceConfiguration conf = new CmsHtmlDifferenceConfiguration( mode == CmsDiffViewMode.ALL ? -1 : 2, A_CmsUI.get().getLocale()); String diff = Diff.diffAsHtml(text1, text2, conf); String html = null; if (CmsStringUtil.isNotEmpty(diff)) { html = diff; } else { html = wrapLinesWithUnchangedStyle( CmsStringUtil.substitute(CmsStringUtil.escapeHtml(m_text1), "<br/>", "")); } return html; }
private void appendAttribute(Tag tag, String text) { if (tag.getTagName().equals("IMG")) { appendText(text); } else { String current = (String)m_attributeMap.get(tag); if (current != null) { text = current + text; } m_attributeMap.put(tag, text); } }
/** * Appends a line break.<p> * * @param count the number of lines */ private void appendLinebreak(int count) { appendLinebreak(count, false); }
/** * @see org.htmlparser.visitors.NodeVisitor#visitTag(org.htmlparser.Tag) */ @Override public void visitTag(Tag tag) { m_appendBr = true; appendLinebreaks(tag, true); if (tag.getTagName().equals("IMG")) { appendText("##IMG##"); } String href = tag.getAttribute("href"); if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(href)) { appendAttribute(tag, " [" + href.trim() + "]"); } String src = tag.getAttribute("src"); if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(src)) { appendAttribute(tag, " [" + src.trim() + "]"); } String title = tag.getAttribute("title"); if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(title)) { appendAttribute(tag, " {" + title.trim() + "}"); } String alt = tag.getAttribute("alt"); if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(alt)) { appendAttribute(tag, " {" + alt.trim() + "}"); } }
text = collapse(text); appendLinebreak(m_storedBrCount); appendIndentation(); m_brCount = 0; if ((word.length() + 1 + m_lineLength) > m_maxLineLength) { m_appendBr = true; appendLinebreak(1); appendIndentation(); m_brCount = 0; } else {
/** * Strips all HTML markup from the given input.<p> * * <ul> * <li>In case the input is an instance of {@link CmsJspContentAccessValueWrapper}, an optimized * method is used for the HTML stripping. * <li>Otherwise the input is converted to a String and this String is stripped. * </ul> * * @param input the input to Strip from HTML * * @return the given input with all HTML stripped. */ public static String stripHtml(Object input) { if (input instanceof CmsJspContentAccessValueWrapper) { CmsJspContentAccessValueWrapper wrapper = (CmsJspContentAccessValueWrapper)input; if (wrapper.getExists()) { return wrapper.getContentValue().getPlainText(wrapper.obtainCmsObject()); } else { return ""; } } try { return CmsHtml2TextConverter.html2text(String.valueOf(input), OpenCms.getSystemInfo().getDefaultEncoding()); } catch (Exception e) { return CmsMessages.formatUnknownKey(e.getMessage()); } }
/** * @see org.htmlparser.visitors.NodeVisitor#visitStringNode(org.htmlparser.Text) */ @Override public void visitStringNode(Text text) { appendText(text.toPlainTextString()); }
private void appendLinebreak(int count) { appendLinebreak(count, false); }
setMarker("=", open); setIndentation(2, open); appendLinebreak(2); break; case 1: // H2 setMarker("==", open); setIndentation(3, open); appendLinebreak(2); break; case 2: // H3 setMarker("===", open); setIndentation(4, open); appendLinebreak(2); break; case 3: // H4 setMarker("====", open); setIndentation(5, open); appendLinebreak(2); break; case 4: // H5 setMarker("=====", open); setIndentation(6, open); appendLinebreak(2); break; case 5: // H6 setMarker("=======", open); setIndentation(7, open); appendLinebreak(2); break;
/** * Strips all HTML markup from the given input.<p> * * <ul> * <li>In case the input is an instance of {@link CmsJspContentAccessValueWrapper}, an optimized * method is used for the HTML stripping. * <li>Otherwise the input is converted to a String and this String is stripped. * </ul> * * @param input the input to Strip from HTML * * @return the given input with all HTML stripped. */ public static String stripHtml(Object input) { if (input instanceof CmsJspContentAccessValueWrapper) { CmsJspContentAccessValueWrapper wrapper = (CmsJspContentAccessValueWrapper)input; if (wrapper.getExists()) { return wrapper.obtainContentValue().getPlainText(wrapper.obtainCmsObject()); } else { return ""; } } try { return CmsHtml2TextConverter.html2text(String.valueOf(input), OpenCms.getSystemInfo().getDefaultEncoding()); } catch (Exception e) { return CmsMessages.formatUnknownKey(e.getMessage()); } }
/** * @see org.htmlparser.visitors.NodeVisitor#visitEndTag(org.htmlparser.Tag) */ @Override public void visitEndTag(Tag tag) { m_appendBr = false; appendLinebreaks(tag, false); String attribute = (String)m_attributeMap.remove(tag.getParent()); if (attribute != null) { appendText(attribute); } }
/** * Extracts the text from the given html content, assuming the given html encoding.<p> * * @param html the content to extract the plain text from * @param encoding the encoding to use * * @return the text extracted from the given html content * * @throws Exception if something goes wrong */ public static String html2text(String html, String encoding) throws Exception { // create the converter instance CmsHtml2TextConverter visitor = new CmsHtml2TextConverter(); return visitor.process(html, encoding); }
/** * @see org.htmlparser.visitors.NodeVisitor#visitStringNode(org.htmlparser.Text) */ @Override public void visitStringNode(Text text) { appendText(text.toPlainTextString()); }
/** * Appends an attribute.<p> * * @param tag the tag * @param text the attribute text */ private void appendAttribute(Tag tag, String text) { if (tag.getTagName().equals("IMG")) { appendText(text); } else { String current = m_attributeMap.get(tag); if (current != null) { text = current + text; } m_attributeMap.put(tag, text); } }