org.jsoup.nodes.Document$OutputSettings.<init> java code examples

/**
 * Get the unencoded XML declaration.
 * @return XML declaration
 */
public String getWholeDeclaration() {
  StringBuilder sb = new StringBuilder();
  try {
    getWholeDeclaration(sb, new Document.OutputSettings());
  } catch (IOException e) {
    throw new SerializationException(e);
  }
  return sb.toString().trim();
}

public String getDescription(String page) {
  try {
    // Fetch the image page
    Response resp = Http.url(page)
        .referrer(this.url)
        .response();
    cookies.putAll(resp.cookies());
    // Try to find the description
    Elements els = resp.parse().select("td[class=alt1][width=\"70%\"]");
    if (els.isEmpty()) {
      LOGGER.debug("No description at " + page);
      throw new IOException("No description found");
    }
    LOGGER.debug("Description found!");
    Document documentz = resp.parse();
    Element ele = documentz.select("td[class=alt1][width=\"70%\"]").get(0); // This is where the description is.
    // Would break completely if FurAffinity changed site layout.
    documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
    ele.select("br").append("\\n");
    ele.select("p").prepend("\\n\\n");
    LOGGER.debug("Returning description at " + page);
    String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
    return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
  } catch (IOException ioe) {
    LOGGER.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
    return null;
  }
}
@Override

 String html = "<p>Arbit string <b>of</b><br><br>text. <em>What</em> to <strong>do</strong> with it?";
String cleaned = Jsoup.clean(html, 
    "", 
    Whitelist.simpleText().addTags("br"),
    new Document.OutputSettings().prettyPrint(false));
System.out.println(cleaned);

whitelist.addAttributes(":all", HTML_WHITELIST_ATTRIB);
OutputSettings outSettings = new Document.OutputSettings();
outSettings.charset(Charsets.UTF_16);
outSettings.syntax(Syntax.xml);

whitelist.addAttributes(":all", HTML_WHITELIST_ATTRIB);
OutputSettings outSettings = new Document.OutputSettings();
outSettings.charset(Charsets.UTF_16);
outSettings.syntax(Syntax.xml);

whitelist.addAttributes(":all", HTML_WHITELIST_ATTRIB);
OutputSettings outSettings = new Document.OutputSettings();
outSettings.charset(Charsets.UTF_16);
outSettings.syntax(Syntax.xml);

whitelist.addAttributes(":all", HTML_WHITELIST_ATTRIB);
OutputSettings outSettings = new Document.OutputSettings();
outSettings.charset(Charsets.UTF_16);
outSettings.prettyPrint(false);

doc.outputSettings(new Document.OutputSettings().prettyPrint(false));
final Element head = doc.head();

 public class HtmlWithLineBreaks 
{

 public String getCleanHtml(Document document)
 {
  document.outputSettings(new Document.OutputSettings().prettyPrint(false)); //makes html() call preserve linebreaks and spacing
  return Jsoup.clean(document.html(),
    "",
    Whitelist.none(),
    new Document.OutputSettings().prettyPrint(false));
 }

 public static void main(String... args)
 {
  File input = new File("/path/to/some/input.html"); //Just replace the input with you own html file source
  Document document;
  try
  {
   document = Jsoup.parse(input, "UTF-8");
   String printOut = new HtmlWithLineBreaks().getCleanHtml(document);
   System.out.println(printOut);
  } catch (IOException e)
  {
   e.printStackTrace();
  } 
 }

}

@Override
public String stripHtmlFromText(String text, boolean smartSpacing, boolean stripEscapeSequences)
{
  if (StringUtils.isBlank(text)) return text;
  if (smartSpacing) {
    text = text.replaceAll("/br>", "/br> ").replaceAll("/p>", "/p> ").replaceAll("/tr>", "/tr> ");
  }
  if (stripEscapeSequences) {
    org.jsoup.nodes.Document document = org.jsoup.Jsoup.parse(text);
    org.jsoup.nodes.Element body = document.body();
    //remove any html tags, unescape any escape characters
    text = body.text();
    //&nbsp; are converted to char code 160, java doesn't treat it like whitespace, so replace it with ' '
    text = text.replace((char)160, ' ');
  } else {
    text = org.jsoup.Jsoup.clean(text, "", org.jsoup.safety.Whitelist.none(), new org.jsoup.nodes.Document.OutputSettings().prettyPrint(false).outline(false));
  }
  if (smartSpacing || stripEscapeSequences) {
    text = text.replaceAll("\\s+", " ");
  }
  return text.trim();
}

d.outputSettings(new Document.OutputSettings().escapeMode(EscapeMode.xhtml).prettyPrint(false));

 String pretty = Jsoup.clean("<img src=\"marco\">Capretta</img><i>Sono misterioso</i><p color=\"white\"><font size=\"5\">Ciao</p><p>some text</p><br/> <p>another text</p></font>" , "", Whitelist.basic().addTags("br", "p","i"), new Document.OutputSettings().prettyPrint(true));
pretty= Jsoup.parse(pretty).getElementsByTag("body").get(0).children().toString();
System.out.println(pretty);

Document.OutputSettings outputSettings = new Document.OutputSettings()
     .prettyPrint(false);
 body = Jsoup.clean(body, "", Whitelist.relaxed(), outputSettings);

 Document doc = Jsoup.parse(sample);
doc.outputSettings(new Document.OutputSettings().prettyPrint(false));
String output = doc.body().html();

/**
 * A method which parses html using Jsoup,
 * @param htmlText a text to parse.
 * @return a document with parsed text.
 */
private Document getParsedHtmlDocument(String htmlText) {
  Document doc = Jsoup.parseBodyFragment(htmlText);
  doc.outputSettings(new Document.OutputSettings().prettyPrint(false));
  doc.head().append(DOC_STYLE);
  doc.body().append(HIGHLIGHT_JS_SCRIPT);
  return doc;
}

 public class Test {
  public static void main(String[] args) {
String s="<p>Text<br /> New Text<br />Second Text<br />Third Text</p>";
    Document document = Jsoup.parse(s);
    document.outputSettings(new Document.OutputSettings().prettyPrint(false));
    document.select("br").append("\\n");
    document.select("p").prepend("\\n\\n");
    String s1 = document.html().replaceAll("\\\\n", "\n");
    System.out.println(Jsoup.clean(s1, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)));
  }
}

 public String getCleanHtml(Document document)
{
 document.outputSettings(new Document.OutputSettings().prettyPrint(false));
 document.select("h1").parents().select("div").append("\n"); // Insert a linebreak after the h1 div parent.
 return Jsoup.clean(document.html(),
   "",
   Whitelist.none(),
   new Document.OutputSettings().prettyPrint(false));
}

 // breaks multi-level of escaping, preventing &amp;lt;script&amp;gt; to be rendered as <script>
String replace = input.replace("&amp;", "");
// decode any encoded html, preventing &lt;script&gt; to be rendered as <script>
String html = StringEscapeUtils.unescapeHtml(replace);
// remove all html tags, but maintain line breaks
String clean = Jsoup.clean(html, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
// decode html again to convert character entities back into text
return StringEscapeUtils.unescapeHtml(clean);

 public static String cleanNoMarkup(String input) {
  final Document.OutputSettings outputSettings = new Document.OutputSettings().prettyPrint(false);
  String output = Jsoup.clean(input, "", Whitelist.none(), outputSettings);
  return output;

}

 public static String br2nl(String html) {
  if(html==null)
    return html;
  Document document = Jsoup.parse(html);
  document.outputSettings(new Document.OutputSettings().prettyPrint(false));//makes html() preserve linebreaks and spacing
  document.select("br").append("\\n");
  document.select("p").prepend("\\n\\n");
  String s = document.html().replaceAll("\\\\n", "\n");
  return Jsoup.clean(s, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
}

Popular in Java

Parsing JSON documents to java classes using gson
onCreateOptionsMenu (Activity)
putExtra (Intent)
getResourceAsStream (ClassLoader)
FileWriter (java.io)
A specialized Writer that writes to a file in the file system. All write requests made by calling me
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
DataSource (javax.sql)
An interface for the creation of Connection objects which represent a connection to a database. This
ImageIO (javax.imageio)
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
Location (org.springframework.beans.factory.parsing)
Class that models an arbitrary location in a Resource.Typically used to track the location of proble
Best plugins for Eclipse

How to use org.jsoup.nodes.Document$OutputSettingsconstructor

Best Java code snippets using org.jsoup.nodes.Document$OutputSettings.<init> (Showing top 20 results out of 315)

How to use
org.jsoup.nodes.Document$OutputSettings
constructor