org.jsoup.nodes.Document$OutputSettings.charset java code examples

/**
 * Update the document's output charset.
 * @param charset the new charset (by name) to use.
 * @return the document's output settings, for chaining
 */
public OutputSettings charset(String charset) {
  charset(Charset.forName(charset));
  return this;
}

  @Override
  public OutputSettings clone() {
    OutputSettings clone;
    try {
      clone = (OutputSettings) super.clone();
    } catch (CloneNotSupportedException e) {
      throw new RuntimeException(e);
    }
    clone.charset(charset.name()); // new charset and charset encoder
    clone.escapeMode = Entities.EscapeMode.valueOf(escapeMode.name());
    // indentAmount, prettyPrint are primitives so object.clone() will handle
    return clone;
  }
}

public OutputSettings() {
  charset(Charset.forName("UTF8"));
}

jsoupDoc.outputSettings().prettyPrint(false).charset(StandardCharsets.UTF_16);
outSettings.charset(Charsets.UTF_16);
outSettings.prettyPrint(false);

.syntax(Document.OutputSettings.Syntax.xml)
.escapeMode(Entities.EscapeMode.xhtml)
.charset(charset);

doc.outputSettings().charset("UTF-8");
doc.outputSettings().escapeMode(EscapeMode.xhtml);
htmlText = Jsoup.clean(doc.body().html(), wl);

document.outputSettings().prettyPrint(false);
document.outputSettings().escapeMode(xhtml);
document.outputSettings().charset("UTF-8");

Document clean = cleaner.clean(dirty);
clean.outputSettings().escapeMode(Entities.EscapeMode.base).charset(StandardCharsets.UTF_8).prettyPrint(false);

Document clean = cleaner.clean(dirty);
clean.outputSettings().escapeMode(Entities.EscapeMode.base).charset(StandardCharsets.UTF_8).prettyPrint(false);

  @Override
  public void generatePDF(OutputStream outputStream, Path template, Path renderingRoot, Map<String, Object> context)
      throws PdfRenderingException {
    ITextRenderer renderer = new ITextRenderer();

    try {
      String html = templateRenderer.renderAsString(template, context);

      // Ensure we have a valid XHTML document using JSoup
      Document jsoupDoc = Jsoup.parse(html);
      jsoupDoc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
      jsoupDoc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
      jsoupDoc.outputSettings().charset("UTF-8");

      String path = renderingRoot.toAbsolutePath().toUri().toString();
      renderer.setDocumentFromString(jsoupDoc.toString(), path);
      renderer.layout();
      renderer.createPDF(outputStream);
    } catch (DocumentException | TemplateRenderingException e) {
      throw new PdfRenderingException(e);
    }
  }
}

static String formatInlineCSS(final String html, final String css)
    throws IOException, SAXException {
  org.jsoup.nodes.Document parsed = Jsoup.parse(html, "UTF-8");
  parsed.outputSettings().charset("UTF-8");
  Document doc = DOMBuilder.jsoup2DOM(parsed);
  DOMAnalyzer da = new DOMAnalyzer(doc);
  da.attributesToStyles();
  da.addStyleSheet(null, CSSNorm.stdStyleSheet(),
      DOMAnalyzer.Origin.AGENT);
  da.addStyleSheet(null, css, null);
  da.getStyleSheets();
  da.stylesToDomInherited();
  String result = toString(doc);
  result = result.replaceAll("class=\"topiclinkwrapper\" style=\"",
      "class=\"topiclinkwrapper\" style=\"text-overflow: ellipsis;");
  // Remove all line breaks
  result = result.replaceAll("\\n", "");
  return result;
}

/**
 * Parses body fragment to the {@code <body>} element.
 * 
 * @param content
 * @return the {@code body} element of the parsed content
 */
private Element parseContent(String content) {
  Document doc = Jsoup.parseBodyFragment(content);
  doc.outputSettings().charset(outputEncoding);
  return doc.body();
}

 String url = request.getParameter("htmluri").trim();
System.out.println("Fetching %s..."+url);

Document doc = Jsoup.connect(url).get();
Document.OutputSettings settings = doc.outputSettings();
settings.prettyPrint(false);
settings.charset("ASCII");
String html = doc.html();
html = StringEscapeUtils.unescapeHtml(html);
html = Jsoup.parse(html).html();   //This will take care of any extra closing tags 
System.out.println(html);

/**
 * Parses body fragment to the {@code <body>} element.
 * 
 * @param content
 * @return the {@code body} element of the parsed content
 */
private Element parseContent(String content) {
  Document doc = Jsoup.parseBodyFragment(content);
  doc.outputSettings().charset(outputEncoding);
  return doc.body();
}

private static Document parseXhtml(final String inputXhtml) {
  final Document originalDocument = Jsoup.parse(inputXhtml, "utf-8", Parser.xmlParser());
  originalDocument.outputSettings().prettyPrint(false);
  originalDocument.outputSettings().escapeMode(xhtml);
  originalDocument.outputSettings().charset("UTF-8");
  return originalDocument;
}

 Document doc = Jsoup.parse("" +
  "<p>THIS &mdash; IS A &ldquo;TEST&rdquo;. 5 &gt; 4. trademark: &#153;</p>");

Document.OutputSettings settings = doc.outputSettings();

settings.prettyPrint(false);
settings.escapeMode(Entities.EscapeMode.extended);
settings.charset("ASCII");

String modifiedFileHtmlStr = doc.html();

System.out.println(modifiedFileHtmlStr);

doc.outputSettings().charset(charsetName);

/**
 * Sets the charset used in this document. This method is equivalent
 * to {@link OutputSettings#charset(java.nio.charset.Charset)
 * OutputSettings.charset(Charset)} but in addition it updates the
 * charset / encoding element within the document.
 * 
 * <p>This enables
 * {@link #updateMetaCharsetElement(boolean) meta charset update}.</p>
 * 
 * <p>If there's no element with charset / encoding information yet it will
 * be created. Obsolete charset / encoding definitions are removed!</p>
 * 
 * <p><b>Elements used:</b></p>
 * 
 * <ul>
 * <li><b>Html:</b> <i>&lt;meta charset="CHARSET"&gt;</i></li>
 * <li><b>Xml:</b> <i>&lt;?xml version="1.0" encoding="CHARSET"&gt;</i></li>
 * </ul>
 * 
 * @param charset Charset
 * 
 * @see #updateMetaCharsetElement(boolean) 
 * @see OutputSettings#charset(java.nio.charset.Charset) 
 */
public void charset(Charset charset) {
  updateMetaCharsetElement(true);
  outputSettings.charset(charset);
  ensureMetaCharsetElement();
}

public Document parse() throws IOException {
  Validate.isTrue(executed, "Request must be executed (with .execute(), .get(), or .post() before parsing response");
  if (byteData != null) { // bytes have been read in to the buffer, parse that
    bodyStream = new ByteArrayInputStream(byteData.array());
    inputStreamRead = false; // ok to reparse if in bytes
  }
  Validate.isFalse(inputStreamRead, "Input stream already read and parsed, cannot re-read.");
  Document doc = DataUtil.parseInputStream(bodyStream, charset, url.toExternalForm(), req.parser());
  charset = doc.outputSettings().charset().name(); // update charset from meta-equiv, possibly
  inputStreamRead = true;
  safeClose();
  return doc;
}

/**
 * Returns the charset used in this document. This method is equivalent
 * to {@link OutputSettings#charset()}.
 * 
 * @return Current Charset
 * 
 * @see OutputSettings#charset() 
 */
public Charset charset() {
  return outputSettings.charset();
}

Javadoc

Get the document's current output charset, which is used to control which characters are escaped when generating HTML (via the html() methods), and which are kept intact.

Where possible (when parsing from a URL or File), the document's output charset is automatically set to the input charset. Otherwise, it defaults to UTF-8.

Popular in Java

Parsing JSON documents to java classes using gson
onCreateOptionsMenu (Activity)
putExtra (Intent)
getResourceAsStream (ClassLoader)
FileWriter (java.io)
A specialized Writer that writes to a file in the file system. All write requests made by calling me
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
ImageIO (javax.imageio)
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
Location (org.springframework.beans.factory.parsing)
Class that models an arbitrary location in a Resource.Typically used to track the location of proble
Top 12 Jupyter Notebook extensions

How to use charsetmethodin org.jsoup.nodes.Document$OutputSettings

Best Java code snippets using org.jsoup.nodes.Document$OutputSettings.charset (Showing top 20 results out of 315)

How to use
charset
method
in
org.jsoup.nodes.Document$OutputSettings