@Override public Page convert(ResponseBody responseBody) throws IOException { Document document = Jsoup.parse(responseBody.string()); List<String> links = new ArrayList<>(); for (Element element : document.select("a[href]")) { links.add(element.attr("href")); } return new Page(document.title(), Collections.unmodifiableList(links)); } }
@Override public String getAlbumTitle(URL url) throws MalformedURLException { try { // Attempt to use album title as GID String title = getFirstPage().title(); Pattern p = Pattern.compile("^Porn pics of (.*) \\(Page 1\\)$"); Matcher m = p.matcher(title); if (m.matches()) { return getHost() + "_" + m.group(1) + "_" + getGID(url); } } catch (IOException e) { // Fall back to default album naming convention } return super.getAlbumTitle(url); }
@Override public void rip() throws IOException { LOGGER.info("Retrieving " + this.url); Document doc = Http.url(url).get(); //Get user friendly filename from page title String title = doc.title(); Elements script = doc.select("script"); if (script.isEmpty()) { throw new IOException("Could not find script code at " + url); } //Regex assumes highest quality source is listed first Pattern p = Pattern.compile("\"source\":\"(.*?)\""); for (Element element : script) { Matcher m = p.matcher(element.data()); if (m.find()){ String vidUrl = m.group(1); addURLToDownload(new URL(vidUrl), HOST + "_" + title); } } waitForThreads(); } }
for (int f = 0; f < fin.length; f++) //fin be string array of urls { try { Document finaldoc = Jsoup.connect(fin[f]).get(); out.println(finaldoc.title()); } catch (SocketTimeoutException exception) { continue; } }
String url = "http://www.google.com/search?hl=en&btnI=1&q=balusc"; Document document = Jsoup.connect(url).userAgent("Mozilla").get(); System.out.println(document.title());
String url = "http://www.google.com/search?hl=en&btnI=1&q=balusc"; Document document = Jsoup.connect(url).get(); System.out.println(document.title());
URL url = new URL("http://example.com/"); Document doc = Jsoup.parse(url, 3*1000); String title = doc.title();
private class MyTask extends AsyncTask<Void, Void, String> { @Override protected String doInBackground(Void... params) { String title =""; Document doc; try { doc = Jsoup.connect("http://google.com/").get(); title = doc.title(); System.out.print(title); } catch (IOException e) { e.printStackTrace(); } return title; } @Override protected void onPostExecute(String result) { //if you had a ui element, you could display the title ((TextView)findViewById (R.id.myTextView)).setText (result); } }
Thread downloadThread = new Thread() { public void run() { Document doc; try { doc = Jsoup.connect("http://google.ca/").get(); String title = doc.title(); System.out.print(title); } catch (IOException e) { e.printStackTrace(); } } }; downloadThread.start();
class JsoupTask extends AsyncTask<String, Void, Void> { private Exception exception; protected void doInBackground(String... url) { Document doc = Jsoup.connect(url).get(); String title = doc.title(); ... } protected void onPostExecute(RSSFeed feed) { // here you can update your UI thread through Handler, for example } }
for (int f = 0; f < fin.length; f++) //fin be string array of urls { try{ Document finaldoc = Jsoup.connect(fin[f]).get(); out.println(finaldoc.title()); } catch(Exception e){} }
import java.io.IOException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; public class SoGetTitleFromString { public static void main(String[] args) throws IOException { String html = "<html><head><title>First parse</title></head>" + "<body><p>Parsed HTML into a doc.</p></body></html>"; Document doc = Jsoup.parse(html); String title = doc.title(); System.out.println("Title is: " + title); } }
public void crawler() { Document doc; title=doc.title(); // null pointer here , doc was never instantiated setTitle(title); System.out.println("Title : " + title); }
URL form = new URL(Your_url); connection1 = (HttpURLConnection)form.openConnection(); connection1.setRequestProperty("Cookie", your_cookie); connection1.setReadTimeout(10000); StringBuilder whole = new StringBuilder(); BufferedReader in = new BufferedReader( new InputStreamReader(new BufferedInputStream(connection1.getInputStream()))); String inputLine; while ((inputLine = in.readLine()) != null) whole.append(inputLine); in.close(); Document doc = Jsoup.parse(whole.toString()); String title = doc.title();
/** * Get the title of the HTML. If no <code>title</code> tag exists, then the * title is null. * * @param htmlContent * the HTML content that may contain a title * @return the title of the HTML or null if none */ public static String getTitle(String htmlContent) { Document doc = Jsoup.parse(htmlContent); Elements titleNode = doc.select("head > title"); return titleNode.isEmpty() ? null : doc.title(); }
private static String extractTitle(final Document document) { final String title = ofNullable(document.title()) .filter(t -> !t.isEmpty()) .orElseGet(() -> document.getElementsByTag("h1").text()); if (title.contains(":: ")) { return title.substring(title.lastIndexOf(":: ") + 2).trim(); } return title; }
@Override protected String doInBackground(String... params) { String url = params[0]; try { Document doc = Jsoup.connect(url).get(); mTitle = doc.title(); return "OK"; } catch (Exception e) { Log.e(TAG, e.getMessage()); return "KO"; } }
public Article get(final Document document, final String url) { return new Article(document.title(), document.body().text().trim(), url); } }
/** * Get the article title as an H1. Currently just uses document.title, we * might want to be smarter in the future. * * @return */ protected Element getArticleTitle() { Element articleTitle = mDocument.createElement("h1"); articleTitle.html(mDocument.title()); return articleTitle; }
static String extractTitle(Document doc) { try { return StringUtils.cleanTitle(new HeuristicString(doc.title()) .or(StringUtils.innerTrim(doc.select("head title").text())) .or(StringUtils.innerTrim(doc.select("head meta[name=title]").attr("content"))) .or(StringUtils.innerTrim(doc.select("head meta[property=og:title]").attr("content"))) .or(StringUtils.innerTrim(doc.select("head meta[name=twitter:title]").attr("content"))) .toString()); } catch (HeuristicString.CandidateFound candidateFound) { return StringUtils.cleanTitle(candidateFound.candidate); } }