public static String getVideoURLAtPage(String url) throws IOException { Document doc = Http.url(url) .userAgent(USER_AGENT) .get(); String html = doc.outerHtml(); String videoURL = null; for (String quality : new String[] {"1080", "720", "480", "240"}) { quality = "url" + quality + "\\\":\\\""; if (html.contains(quality)) { videoURL = html.substring(html.indexOf(quality) + quality.length()); videoURL = videoURL.substring(0, videoURL.indexOf("\"")); videoURL = videoURL.replace("\\", ""); break; } } if (videoURL == null) { throw new IOException("Could not find video URL at " + url); } return videoURL; } }
@Override public void run() { try { if (isStopped() && !isThisATest()) { return; } String u = this.url.toExternalForm(); Document doc = Http.url(u) .referrer(u) .get(); Pattern p = Pattern.compile("^.*__fileurl = '([^']+)';.*$", Pattern.DOTALL); Matcher m = p.matcher(doc.outerHtml()); if (m.matches()) { String file = m.group(1); String prefix = ""; if (Utils.getConfigBoolean("download.save_order", true)) { prefix = String.format("%03d_", index); } addURLToDownload(new URL(file), prefix); } else { LOGGER.warn("[!] could not find '__fileurl' at " + url); } } catch (IOException e) { LOGGER.error("[!] Exception while loading/parsing " + this.url, e); } } }
return parse.outerHtml();
parse.head().prepend("<script src=\"/static/components/jquery/dist/jquery.min.js\"></script>"); byte[] bytes = parse.outerHtml().getBytes(charset); DefaultFullHttpResponse copy = new DefaultFullHttpResponse( resp.getProtocolVersion(), resp.getStatus(), Unpooled.wrappedBuffer(bytes));
return document.outerHtml(); } else { StringBuilder sb = new StringBuilder();
/** * Get the combined outer HTML of all matched elements. * * @return */ public final String outerHtml() { return mDocument.outerHtml(); }
@SuppressWarnings("unused") public String getOuterHtml() { return originalDoc.outerHtml(); }
@SuppressWarnings("unused") public String getOuterHtml() { return originalDoc.outerHtml(); }
import org.apache.commons.io.FileUtils; public void downloadPage() throws Exception { final Response response = Jsoup.connect("http://www.example.net").execute(); final Document doc = response.parse(); final File f = new File("filename.html"); FileUtils.writeStringToFile(f, doc.outerHtml(), "UTF-8"); }
private String formatCodeAllFormatted(String code) { Document doc = Jsoup.parse(code); return removeEmptyLines(doc.outerHtml()); }
private String formatCodeAllFormatted(String code) { Document doc = Jsoup.parse(code); return removeEmptyLines(doc.outerHtml()); }
Document doc = Jsoup.parse(html); doc.outputSettings().indentAmount(4).outline(true); String result = doc.outerHtml();
@Override public ContentWithImages inline(String htmlContent, List<ImageResource> images) { Document doc = Jsoup.parse(htmlContent); for (ImageResource image : images) { Elements imgs = getImagesToInline(doc, image); for (Element img : imgs) { img.attr(SRC_ATTR, MessageFormat.format(BASE64_URI, image.getMimetype(), Base64Utils.encodeToString(image.getContent()))); img.attr(INLINED_ATTR, true); } } return new ContentWithImages(doc.outerHtml(), new ArrayList<Attachment>(0)); }
/** * Pre-process HTML. * * @param request the request * @param is the input stream * @return the updated input stream * @throws IOException Signals that an I/O exception has occurred. */ protected InputStream preProcessHtml(Request request, InputStream is) throws IOException { if (request == null || is == null || !Boolean.parseBoolean(request.getParameter("pre-parse-html"))) { return is; } try { org.jsoup.nodes.Document doc = Jsoup.parse(is, "ISO-8859-9", "/"); doc.outputSettings().escapeMode(EscapeMode.xhtml); return new ByteArrayInputStream(doc.outerHtml().getBytes()); } finally { IOUtils.closeQuietly(is); } }
@Override public String getPlainText(String rawText, FreetextConfig textConfig) { Document jsoupDoc = Jsoup.parse(rawText, "", Parser.xmlParser()); for (Element paragraph : jsoupDoc.select("PARA")) { int level = paragraph.attr("LVL") != null ? Integer.valueOf(paragraph.attr("LVL")) : 0; StringBuilder stb = new StringBuilder(); for (int i = 0; i <= level; i++) { stb.append(" "); } paragraph.prepend(stb.toString()); } String simpleHtml = getSimpleHtml(jsoupDoc.outerHtml()); Document simpleDoc = Jsoup.parse(simpleHtml, "", Parser.xmlParser()); HtmlToPlainText htmlConvert = new HtmlToPlainText(textConfig); return htmlConvert.getPlainText(simpleDoc); }
@Override public String getPlainText(String rawText, FreetextConfig textConfig) { Document jsoupDoc = Jsoup.parse(rawText, "", Parser.xmlParser()); for (int j = 0; j < 4; j++) { for (Element paragraph : jsoupDoc.select("PA" + j)) { int level = j; StringBuilder stb = new StringBuilder(); for (int i = 0; i <= level; i++) { stb.append(" "); } paragraph.prepend(stb.toString()); } } String simpleHtml = getSimpleHtml(jsoupDoc.outerHtml()); Document simpleDoc = Jsoup.parse(simpleHtml, "", Parser.xmlParser()); HtmlToPlainText htmlConvert = new HtmlToPlainText(textConfig); return htmlConvert.getPlainText(simpleDoc); }
@Override public String inline(String htmlContent, List<ExternalCss> cssContents) { Document doc = Jsoup.parse(htmlContent); internStyles(doc, cssContents); String stylesheet = fetchStyles(doc); extractStyles(doc, stylesheet); applyStyles(doc); return doc.outerHtml(); }
public String output() { // body only attr on body if (renderedDocument.body().hasAttr(ExtNodeConstants.ATTR_BODY_ONLY_WITH_NS)) { return renderedDocument.body().html(); } // body only meta Elements bodyonlyMeta = renderedDocument.head().select(SelectorUtil.attr("meta", ExtNodeConstants.ATTR_BODY_ONLY_WITH_NS, null)); if (bodyonlyMeta.size() > 0) { return renderedDocument.body().html(); } // full page return renderedDocument.outerHtml(); }
@Override public boolean synchronizedHandleRequest(VaadinSession session, VaadinRequest request, VaadinResponse response) throws IOException { // Find UI class Class<? extends UI> uiClass = getUIClass(request); BootstrapContext context = createAndInitUI(uiClass, request, response, session); ServletHelper.setResponseNoCacheHeaders(response::setHeader, response::setDateHeader); Document document = getBootstrapPage(context); writeBootstrapPage(response, document.outerHtml()); return true; }
/** * Takes an input string representing an html document and processes it with * the Css Inliner. * * @param input the html document * @return the processed html document */ public String process(String input) { Document doc = Jsoup.parse(input); // check if the user wants to inline the data Elements elements = doc.getElementsByAttributeValue(DATA_INLINE_ATTR, "true"); if (elements.isEmpty()) { return input; } extractStyles(doc); applyStyles(doc); inlineImages(doc); doc.outputSettings(doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml).prettyPrint(false).escapeMode(Entities.EscapeMode.extended)); return StringEscapeUtils.unescapeHtml(doc.outerHtml()); }