final EscapeMode escapeMode = out.escapeMode(); final CharsetEncoder encoder = out.encoder(); final CoreCharset coreCharset = out.coreCharset; // init in out.prepareEncoder()
/** * Pre-process HTML. * * @param request the request * @param is the input stream * @return the updated input stream * @throws IOException Signals that an I/O exception has occurred. */ protected InputStream preProcessHtml(Request request, InputStream is) throws IOException { if (request == null || is == null || !Boolean.parseBoolean(request.getParameter("pre-parse-html"))) { return is; } try { org.jsoup.nodes.Document doc = Jsoup.parse(is, "ISO-8859-9", "/"); doc.outputSettings().escapeMode(EscapeMode.xhtml); return new ByteArrayInputStream(doc.outerHtml().getBytes()); } finally { IOUtils.closeQuietly(is); } }
/** * Pre-process HTML. * * @param request the request * @param is the input stream * @return the updated input stream * @throws IOException Signals that an I/O exception has occurred. */ protected InputStream preProcessHtml(Request request, InputStream is) throws IOException { if (request == null || is == null || !Boolean.parseBoolean(request.getParameter("pre-parse-html"))) { return is; } try { org.jsoup.nodes.Document doc = Jsoup.parse(is, "ISO-8859-9", "/"); doc.outputSettings().escapeMode(EscapeMode.xhtml); return new ByteArrayInputStream(doc.outerHtml().getBytes()); } finally { IOUtils.closeQuietly(is); } }
.prettyPrint(false) .syntax(Document.OutputSettings.Syntax.xml) .escapeMode(Entities.EscapeMode.xhtml) .charset(charset);
doc.outputSettings().escapeMode(EscapeMode.xhtml); htmlText = Jsoup.clean(doc.body().html(), wl); htmlText = StringEscapeUtils.unescapeHtml(htmlText);
outputSettings.escapeMode( EscapeMode.xhtml ); outputSettings.prettyPrint( false ); value = Jsoup.clean( value, "", whitelist, outputSettings );
private static String reformatXHtml(final String inputXhtml, final Map<String, ConfluenceLink> confluenceLinkMap) { final Document document = Jsoup.parse(inputXhtml, "utf-8", Parser.xmlParser()); document.outputSettings().prettyPrint(false); document.outputSettings().escapeMode(xhtml); document.outputSettings().charset("UTF-8");
Document clean = cleaner.clean(dirty); clean.outputSettings().escapeMode(Entities.EscapeMode.base).charset(StandardCharsets.UTF_8).prettyPrint(false);
Document clean = cleaner.clean(dirty); clean.outputSettings().escapeMode(Entities.EscapeMode.base).charset(StandardCharsets.UTF_8).prettyPrint(false);
/** * Gets the JSoup document. * * @param urlString the URL string * @param request the request * @return the JSoup document * @throws Exception the exception */ protected Document getJsoupDocument(String urlString, Request request) throws Exception { InputStream is = null; URLConnection c = null; try { URL url = UrlFactory.getUrl(urlString, request); c = url.openConnection(); is = c.getInputStream(); final Document doc = Jsoup.parse(is, "ISO-8859-9", "/"); doc.outputSettings().escapeMode(EscapeMode.xhtml); return doc; } finally { IOUtils.closeQuietly(is); UrlFactory.disconnect(c); } }
/** * Gets the JSoup document. * * @param urlString the URL string * @param request the request * @return the JSoup document * @throws Exception the exception */ protected Document getJsoupDocument(String urlString, Request request) throws Exception { InputStream is = null; URLConnection c = null; try { URL url = UrlFactory.getUrl(urlString, request); c = url.openConnection(); is = c.getInputStream(); final Document doc = Jsoup.parse(is, "ISO-8859-9", "/"); doc.outputSettings().escapeMode(EscapeMode.xhtml); return doc; } finally { IOUtils.closeQuietly(is); UrlFactory.disconnect(c); } }
@Override public void generatePDF(OutputStream outputStream, Path template, Path renderingRoot, Map<String, Object> context) throws PdfRenderingException { ITextRenderer renderer = new ITextRenderer(); try { String html = templateRenderer.renderAsString(template, context); // Ensure we have a valid XHTML document using JSoup Document jsoupDoc = Jsoup.parse(html); jsoupDoc.outputSettings().syntax(Document.OutputSettings.Syntax.xml); jsoupDoc.outputSettings().escapeMode(Entities.EscapeMode.xhtml); jsoupDoc.outputSettings().charset("UTF-8"); String path = renderingRoot.toAbsolutePath().toUri().toString(); renderer.setDocumentFromString(jsoupDoc.toString(), path); renderer.layout(); renderer.createPDF(outputStream); } catch (DocumentException | TemplateRenderingException e) { throw new PdfRenderingException(e); } } }
/** * Takes an input string representing an html document and processes it with * the Css Inliner. * * @param input the html document * @return the processed html document */ public String process(String input) { Document doc = Jsoup.parse(input); // check if the user wants to inline the data Elements elements = doc.getElementsByAttributeValue(DATA_INLINE_ATTR, "true"); if (elements.isEmpty()) { return input; } extractStyles(doc); applyStyles(doc); inlineImages(doc); doc.outputSettings(doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml).prettyPrint(false).escapeMode(Entities.EscapeMode.extended)); return StringEscapeUtils.unescapeHtml(doc.outerHtml()); }
public String filter(String content) throws SystemException { try { Document document = Jsoup.parseBodyFragment(content); Document.OutputSettings outputSettings = document.outputSettings(); outputSettings.escapeMode(Entities.EscapeMode.xhtml); outputSettings.prettyPrint(false); for (Filter filter : filters) { document = filter.runFilter(document); } return document.getElementsByTag("body").html(); } catch (Exception e) { log.error("Could not filter", e); throw new SystemException("Could not filter", e); } }
private static String parseDocument(Document dirtyDoc) { indentation = -1; String title = dirtyDoc.title(); Whitelist whitelist = Whitelist.relaxed(); Cleaner cleaner = new Cleaner(whitelist); Document doc = cleaner.clean(dirtyDoc); doc.outputSettings().escapeMode(EscapeMode.xhtml); if (!title.trim().equals("")) { return "# " + title + "\n\n" + getTextContent(doc); } else { return getTextContent(doc); } }
@Override public String processHtml(String source) { org.jsoup.nodes.Document document = Jsoup.parse(source); processHtmlDocument(document); document.outputSettings() .syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml) .prettyPrint(false) .escapeMode(Entities.EscapeMode.xhtml); return document.html(); }
@Override public String body() { final Document html = Jsoup.parse(super.body()); html.outputSettings().syntax(Document.OutputSettings.Syntax.xml); html.outputSettings().escapeMode(Entities.EscapeMode.xhtml); return html.html(); }
@Override public String processHtml(String source) { org.jsoup.nodes.Document document = Jsoup.parse(source); processHtmlDocument(document); document.outputSettings() .syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml) .prettyPrint(false) .escapeMode(Entities.EscapeMode.xhtml); return document.html(); }
private static Document parseXhtml(final String inputXhtml) { final Document originalDocument = Jsoup.parse(inputXhtml, "utf-8", Parser.xmlParser()); originalDocument.outputSettings().prettyPrint(false); originalDocument.outputSettings().escapeMode(xhtml); originalDocument.outputSettings().charset("UTF-8"); return originalDocument; }
Document doc = Jsoup.parse("" + "<p>THIS — IS A “TEST”. 5 > 4. trademark: ™</p>"); Document.OutputSettings settings = doc.outputSettings(); settings.prettyPrint(false); settings.escapeMode(Entities.EscapeMode.extended); settings.charset("ASCII"); String modifiedFileHtmlStr = doc.html(); System.out.println(modifiedFileHtmlStr);