Java Examples & Tutorials of Parser.parseInput (org.jsoup.parser)

/**
 Parse HTML into a Document, using the provided Parser. You can provide an alternate parser, such as a simple XML
 (non-HTML) parser.
 @param html    HTML to parse
 @param baseUri The URL where the HTML was retrieved from. Used to resolve relative URLs to absolute URLs, that occur
 before the HTML declares a {@code <base href>} tag.
 @param parser alternate {@link Parser#xmlParser() parser} to use.
 @return sane HTML
 */
public static Document parse(String html, String baseUri, Parser parser) {
  return parser.parseInput(html, baseUri);
}

doc = parser.parseInput(docData, baseUri);
  reader.skip(1);
try {
  doc = parser.parseInput(reader, baseUri);
} catch (UncheckedIOException e) {

private String readTaskFormName(DataInputAssociation inputAssociation) {
  Optional<FormalExpression> optional = inputAssociation.getAssignment()
      .stream()
      .filter(assignment -> assignment.getFrom() != null && assignment.getFrom() instanceof FormalExpression)
      .map(assignment -> (FormalExpression)assignment.getFrom())
      .findAny();
  if(optional.isPresent()) {
    return Parser.xmlParser().parseInput(optional.get().getBody(), "").toString();
  }
  return "";
}

try {
  Parser parser = Parser.htmlParser().setTrackErrors(0);
  @Nonnull Document doc = parser.parseInput(html, "");
  @Nonnull Elements tags = doc.select(tagName);

/**
 * Attempt to find a META tag in the HTML that hints at the character set
 * used to write the document.
 */
private static String getCharsetFromMeta(byte buffer[], int maxlength) {
  // convert to UTF-8 String -- which hopefully will not mess up the
  // characters we're interested in...
  int len = buffer.length;
  if (maxlength > 0 && maxlength < len) {
    len = maxlength;
  }
  String html = new String(buffer, 0, len, DEFAULT_CHARSET);
  Document doc = Parser.htmlParser().parseInput(html, "dummy");
  // look for <meta http-equiv="Content-Type"
  // content="text/html;charset=gb2312"> or HTML5 <meta charset="gb2312">
  Elements metaElements = doc
      .select("meta[http-equiv=content-type], meta[charset]");
  String foundCharset = null;
  for (Element meta : metaElements) {
    if (meta.hasAttr("http-equiv"))
      foundCharset = getCharsetFromContentType(meta.attr("content"));
    if (foundCharset == null && meta.hasAttr("charset"))
      foundCharset = meta.attr("charset");
    if (foundCharset != null)
      return foundCharset;
  }
  return foundCharset;
}

  @Override
  public void handle(SkypeImpl skype, JsonObject resource) throws ConnectionException, ChatNotFoundException, IOException {
    String content = Utils.getString(resource, "content");
    String chatId = Utils.getString(resource, "conversationLink");
    String author = getAuthor(resource);
    Validate.notNull(content, "Null content");
    Validate.notNull(chatId, "Null chat");
    Validate.notNull(author, "Null author");
    String username = getUsername(author);
    Validate.notNull(username, "Null username");
    Chat chat = getChat(chatId, skype);
    Validate.notNull(chat, "Null chatobj");
    Participant initiator = chat.getParticipant(username);
    Validate.notNull(initiator, "Null initiator");
    Document doc = Parser.xmlParser().parseInput(content, "");
    List<ReceivedFile> receivedFiles = doc
        .getElementsByTag("file")
        .stream()
        .map(fe -> new ReceivedFileImpl(fe.text(), Long.parseLong(fe.attr("size")),
            Long.parseLong(fe.attr("tid"))))
        .collect(Collectors.toList());
    FileReceivedEvent event = new FileReceivedEvent(chat, initiator, receivedFiles);
    skype.getEventDispatcher().callEvent(event);
  }
},

    .decode(ByteBuffer.wrap(content)).toString();
jsoupDoc = Parser.htmlParser().parseInput(html, url);

if (!StringUtils.isEmpty(taskName)) {
  taskName = Parser.xmlParser().parseInput(taskName,
                       "").toString();
  formVariables.setTaskName(taskName);

Participant u = getUser(from, c);
String content = resource.get("content").asString();
Document doc = Parser.xmlParser().parseInput(content, "");
if (doc.getElementsByTag("meta").size() == 0) {
  throw new IllegalArgumentException("No meta? " + resource);

@Test
public void testExclusionCase() throws IOException {
  Config conf = new Config();
  conf.put(TextExtractor.EXCLUDE_PARAM_NAME, "style");
  TextExtractor extractor = new TextExtractor(conf);
  String content = "<html>the<STYLE>main</STYLE>content of the page</html>";
  Document jsoupDoc = Parser.htmlParser().parseInput(content,
      "http://stormcrawler.net");
  String text = extractor.text(jsoupDoc.body());
  assertEquals("the content of the page", text);
}

@Test
public void testMainContent() throws IOException {
  Config conf = new Config();
  conf.put(TextExtractor.INCLUDE_PARAM_NAME, "DIV[id=\"maincontent\"]");
  TextExtractor extractor = new TextExtractor(conf);
  String content = "<html>the<div id='maincontent'>main<div>content</div></div>of the page</html>";
  Document jsoupDoc = Parser.htmlParser().parseInput(content,
      "http://stormcrawler.net");
  String text = extractor.text(jsoupDoc.body());
  assertEquals("main content", text);
}

@Test
public void testExclusion() throws IOException {
  Config conf = new Config();
  conf.put(TextExtractor.EXCLUDE_PARAM_NAME, "STYLE");
  TextExtractor extractor = new TextExtractor(conf);
  String content = "<html>the<style>main</style>content of the page</html>";
  Document jsoupDoc = Parser.htmlParser().parseInput(content,
      "http://stormcrawler.net");
  String text = extractor.text(jsoupDoc.body());
  assertEquals("the content of the page", text);
}

Popular methods of Parser

xmlParser
Create a new XML parser. This parser assumes no knowledge of the incoming tags and does not treat it
htmlParser
Create a new HTML parser. This parser treats input as HTML5, and enforces the creation of a normalis
unescapeEntities
Utility method to unescape HTML entities from a string
<init>
Create a new Parser, using the specified TreeBuilder
parse
Parse HTML into a Document.
setTrackErrors
Enable or disable parse error tracking for the next parse.
getErrors
Retrieve the parse errors, if any, from the last parse.
isTrackErrors
Check if parse error tracking is enabled.
parseBodyFragment
Parse a fragment of HTML into the body of a Document.
parseFragment
Parse a fragment of HTML into a list of nodes. The context element, if supplied, supplies parsing co
settings

settings

Popular in Java

Running tasks concurrently on multiple threads
runOnUiThread (Activity)
onRequestPermissionsResult (Fragment)
notifyDataSetChanged (ArrayAdapter)
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
URI (java.net)
A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
Time (java.sql)
Java representation of an SQL TIME value. Provides utilities to format and parse the time's represen
Calendar (java.util)
Calendar is an abstract base class for converting between a Date object and a set of integer fields
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
Kernel (java.awt.image)
From CI to AI: The AI layer in your organization

How to use parseInputmethodin org.jsoup.parser.Parser

Best Java code snippets using org.jsoup.parser.Parser.parseInput (Showing top 12 results out of 315)

How to use
parseInput
method
in
org.jsoup.parser.Parser