public static String formatHTML(String html) { try { return parse(html, "", xmlParser()).toString(); } catch (Exception e) { return html; } }
public HtmlParser(String baseUri, String content) { long beginTime = System.currentTimeMillis(); log = LogFactory.getLog(HtmlParser.class); this.baseUri = baseUri; if (isTable(content)) { this.document = Jsoup.parse(content, baseUri, Parser.xmlParser()); } else { this.document = Jsoup.parse(content, baseUri); } long endTime = System.currentTimeMillis(); if (log.isTraceEnabled()) { log.trace("init html parser : " + (endTime - beginTime) + "ms"); } }
void insert(Token.Comment commentToken) { Comment comment = new Comment(commentToken.getData()); Node insert = comment; if (commentToken.bogus) { // xml declarations are emitted as bogus comments (which is right for html, but not xml) // so we do a bit of a hack and parse the data as an element to pull the attributes out String data = comment.getData(); if (data.length() > 1 && (data.startsWith("!") || data.startsWith("?"))) { Document doc = Jsoup.parse("<" + data.substring(1, data.length() -1) + ">", baseUri, Parser.xmlParser()); if (doc.childNodeSize() > 0) { Element el = doc.child(0); insert = new XmlDeclaration(settings.normalizeTag(el.tagName()), data.startsWith("!")); insert.attributes().addAll(el.attributes()); } // else, we couldn't parse it as a decl, so leave as a comment } } insertNode(insert); }
Document document = Jsoup.parse(str, "", Parser.xmlParser());
req.parser(Parser.xmlParser());
@Override public Document parse(String content) { Document doc = Jsoup.parse(content, "", Parser.xmlParser()); doc.outputSettings().prettyPrint(false); return doc; }
public static String formatHTML(String html) { try { return parse(html, "", xmlParser()).toString(); } catch (Exception e) { return html; } }
public static String formatHTML(String html) { try { return parse(html, "", xmlParser()).toString(); } catch (Exception e) { return html; } }
@Override public List<String> getParagraphText(String rawText) { String textWithPMarks = getSimpleHtml(rawText); Document jsoupDoc = Jsoup.parse(textWithPMarks, "", Parser.xmlParser()); List<String> paragraphs = new ArrayList<String>(); for (Element element : jsoupDoc.select("p")) { paragraphs.add(element.html()); } return paragraphs; } }
@Override public List<String> getParagraphText(String rawText) { String textWithPMarks = getSimpleHtml(rawText); Document jsoupDoc = Jsoup.parse(textWithPMarks, "", Parser.xmlParser()); List<String> paragraphs = new ArrayList<String>(); for (Element element : jsoupDoc.select("p")) { paragraphs.add(element.html()); } return paragraphs; } }
@Override public List<String> getParagraphText(String rawText) { String textWithPMarks = getSimpleHtml(rawText); Document jsoupDoc = Jsoup.parse(textWithPMarks, "", Parser.xmlParser()); List<String> paragraphs = new ArrayList<String>(); for (Element element : jsoupDoc.select("p")) { paragraphs.add(element.html()); } return paragraphs; } }
private static StringBuilder createHeaderInjectionCall(String content) { StringBuilder inlineContent = new StringBuilder(); Document document = Jsoup.parse(content, "", Parser.xmlParser()); for (Element element : document.children()) { String tagName = element.tagName(); inlineContent.append("_inlineHeader('"); inlineContent.append(tagName).append("',"); inlineContent.append(makeJsString(element.html())); inlineContent.append(");\n"); } return inlineContent; }
private List<YoutubeTrackFormat> loadTrackFormatsFromDash(String dashUrl, HttpInterface httpInterface, String playerScript) throws Exception { String resolvedDashUrl = sourceManager.getCipherManager().getValidDashUrl(httpInterface, playerScript, dashUrl); try (CloseableHttpResponse response = httpInterface.execute(new HttpGet(resolvedDashUrl))) { int statusCode = response.getStatusLine().getStatusCode(); if (statusCode != 200) { throw new IOException("Invalid status code for track info page response: " + statusCode); } Document document = Jsoup.parse(response.getEntity().getContent(), CHARSET, "", Parser.xmlParser()); return loadTrackFormatsFromDashDocument(document); } }
@Override public Document parse(String content) { Document doc = Jsoup.parse(content, "", Parser.xmlParser()); doc.outputSettings().prettyPrint(false); return doc; }
@Override public String getPlainText(String rawText, FreetextConfig textConfig) { String simpleHtml = getSimpleHtml(rawText); Document simpleDoc = Jsoup.parse(simpleHtml, "", Parser.xmlParser()); HtmlToPlainText htmlConvert = new HtmlToPlainText(textConfig); return htmlConvert.getPlainText(simpleDoc); }
private String readTaskFormName(DataInputAssociation inputAssociation) { Optional<FormalExpression> optional = inputAssociation.getAssignment() .stream() .filter(assignment -> assignment.getFrom() != null && assignment.getFrom() instanceof FormalExpression) .map(assignment -> (FormalExpression)assignment.getFrom()) .findAny(); if(optional.isPresent()) { return Parser.xmlParser().parseInput(optional.get().getBody(), "").toString(); } return ""; }
@Override protected String doInBackground(String... strings) { try { Document rssDocument = Jsoup.connect(mUrl).ignoreContentType(true).parser(Parser.xmlParser()).get(); mItems = rssDocument.select("item"); } catch (IOException e) { e.printStackTrace(); return "failure"; } return "success"; }
private String getFragmentContent(String content, Element scriptContentDocument) { Document resultDocument = Jsoup.parse(content, "UTF-8", Parser.xmlParser()); Element scriptTag = resultDocument.child(0).empty(); scriptContentDocument.childNodesCopy().forEach(scriptTag::appendChild); return resultDocument.html(); }
private static Element createDependencyElement(BootstrapContext context, JsonObject dependencyJson) { String type = dependencyJson.getString(Dependency.KEY_TYPE); if (Dependency.Type.contains(type)) { Dependency.Type dependencyType = Dependency.Type.valueOf(type); return createDependencyElement(context.getUriResolver(), LoadMode.INLINE, dependencyJson, dependencyType); } return Jsoup.parse(dependencyJson.getString(Dependency.KEY_CONTENTS), "", Parser.xmlParser()); }
private static Document parseXhtml(final String inputXhtml) { final Document originalDocument = Jsoup.parse(inputXhtml, "utf-8", Parser.xmlParser()); originalDocument.outputSettings().prettyPrint(false); originalDocument.outputSettings().escapeMode(xhtml); originalDocument.outputSettings().charset("UTF-8"); return originalDocument; }