Document.OutputSettings getOutputSettings() { Document owner = ownerDocument(); return owner != null ? owner.outputSettings() : (new Document("")).outputSettings(); }
@Override protected void initialiseParse(Reader input, String baseUri, ParseErrorList errors, ParseSettings settings) { super.initialiseParse(input, baseUri, errors, settings); stack.add(doc); // place the document onto the stack. differs from HtmlTreeBuilder (not on stack) doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml); }
/** Get the HTML representation of this attribute; e.g. {@code href="index.html"}. @return HTML */ public String html() { StringBuilder accum = new StringBuilder(); try { html(accum, (new Document("")).outputSettings()); } catch(IOException exception) { throw new SerializationException(exception); } return accum.toString(); }
/** Get the HTML representation of these attributes. @return HTML @throws SerializationException if the HTML representation of the attributes cannot be constructed. */ public String html() { StringBuilder accum = new StringBuilder(); try { html(accum, (new Document("")).outputSettings()); // output settings a bit funky, but this html() seldom used } catch (IOException e) { // ought never happen throw new SerializationException(e); } return accum.toString(); }
public static String toCompactString(Document document) { document.outputSettings() .prettyPrint(false) .indentAmount(0); return document.html(); } }
public Document parse() throws IOException { Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response"); if (byteData != null) { // bytes have been read in to the buffer, parse that bodyStream = new ByteArrayInputStream(byteData.array()); inputStreamRead = false; // ok to reparse if in bytes } Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read."); Document doc = DataUtil.parseInputStream(bodyStream, charset, url.toExternalForm(), req.parser()); charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly inputStreamRead = true; safeClose(); return doc; }
public String getDescription(String page) { try { // Fetch the image page Response resp = Http.url(page) .referrer(this.url) .response(); cookies.putAll(resp.cookies()); // Try to find the description Elements els = resp.parse().select("td[class=alt1][width=\"70%\"]"); if (els.isEmpty()) { LOGGER.debug("No description at " + page); throw new IOException("No description found"); } LOGGER.debug("Description found!"); Document documentz = resp.parse(); Element ele = documentz.select("td[class=alt1][width=\"70%\"]").get(0); // This is where the description is. // Would break completely if FurAffinity changed site layout. documentz.outputSettings(new Document.OutputSettings().prettyPrint(false)); ele.select("br").append("\\n"); ele.select("p").prepend("\\n\\n"); LOGGER.debug("Returning description at " + page); String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)); return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name. } catch (IOException ioe) { LOGGER.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'"); return null; } } @Override
doc.outputSettings().charset(charsetName);
/** * Get safe HTML from untrusted input HTML, by parsing input HTML and filtering it through a white-list of * permitted tags and attributes. * <p>The HTML is treated as a body fragment; it's expected the cleaned HTML will be used within the body of an * existing document. If you want to clean full documents, use {@link Cleaner#clean(Document)} instead, and add * structural tags (<code>html, head, body</code> etc) to the whitelist. * * @param bodyHtml input untrusted HTML (body fragment) * @param baseUri URL to resolve relative URLs against * @param whitelist white-list of permitted HTML elements * @param outputSettings document output settings; use to control pretty-printing and entity escape modes * @return safe HTML (body fragment) * @see Cleaner#clean(Document) */ public static String clean(String bodyHtml, String baseUri, Whitelist whitelist, Document.OutputSettings outputSettings) { Document dirty = parseBodyFragment(bodyHtml, baseUri); Cleaner cleaner = new Cleaner(whitelist); Document clean = cleaner.clean(dirty); clean.outputSettings(outputSettings); return clean.body().html(); }
OutputSettings.Syntax syntax = outputSettings().syntax();
/** * Writes the given jsoup document to the output stream (in UTF-8) * * @param doc * the document to write * @param outputStream * the stream to write to * @throws IOException * if writing fails */ private static void write(Document doc, OutputStream outputStream) throws IOException { doc.outputSettings().indentAmount(4); doc.outputSettings().syntax(Syntax.html); doc.outputSettings().prettyPrint(true); outputStream.write(doc.html().getBytes(UTF_8)); }
@Override public Document parse(String content) { Document doc = Jsoup.parse(content, "", Parser.xmlParser()); doc.outputSettings().prettyPrint(false); return doc; }
Document newDocument = Jsoup.parse(htmlString, StringUtils.EMPTY, Parser.htmlParser()); newDocument.outputSettings().escapeMode(EscapeMode.base); /** * Need CharEncoding.US_ASCII and not UTF-8 so the special characters will be encoded properly, * but representation of such will change. For instance: — will be encoded as — */ newDocument.outputSettings().charset(CharEncoding.US_ASCII); newDocument.outputSettings().prettyPrint(false); // this will make sure that it will not add line breaks
private String toXHTML( String html ) { final Document document = Jsoup.parse( html ); document.outputSettings().syntax( Document.OutputSettings.Syntax.xml ); return document.html(); }
private static Document parseXhtml(final String inputXhtml) { final Document originalDocument = Jsoup.parse(inputXhtml, "utf-8", Parser.xmlParser()); originalDocument.outputSettings().prettyPrint(false); originalDocument.outputSettings().escapeMode(xhtml); originalDocument.outputSettings().charset("UTF-8"); return originalDocument; }
@Override public Document parse(String content) { Document doc = Jsoup.parse(content, "", Parser.xmlParser()); doc.outputSettings().prettyPrint(false); return doc; }
Document doc = Jsoup.parse("" + "<p>THIS — IS A “TEST”. 5 > 4. trademark: ™</p>"); Document.OutputSettings settings = doc.outputSettings(); settings.prettyPrint(false); settings.escapeMode(Entities.EscapeMode.extended); settings.charset("ASCII"); String modifiedFileHtmlStr = doc.html(); System.out.println(modifiedFileHtmlStr);
/** * Parses body fragment to the {@code <body>} element. * * @param content * @return the {@code body} element of the parsed content */ private Element parseContent(String content) { Document doc = Jsoup.parseBodyFragment(content); doc.outputSettings().charset(outputEncoding); return doc.body(); }
@Override public String processHtml(String source) { org.jsoup.nodes.Document document = Jsoup.parse(source); processHtmlDocument(document); document.outputSettings() .syntax(org.jsoup.nodes.Document.OutputSettings.Syntax.xml) .prettyPrint(false) .escapeMode(Entities.EscapeMode.xhtml); return document.html(); }
public static String prettyPrint(String ugly) { Document doc = Jsoup.parseBodyFragment(ugly); doc.outputSettings().indentAmount(2); return doc.body().html(); }