org.jsoup.select.Elements.first java code examples

@Override
public List<String> getDescriptionsFromPage(Document page) {
  List<String> urls = new ArrayList<>();
  Elements urlElements = page.select("figure.t-image > b > u > a");
  for (Element e : urlElements) {
    urls.add(urlBase + e.select("a").first().attr("href"));
    LOGGER.debug("Desc2 " + urlBase + e.select("a").first().attr("href"));
  }
  return urls;
}
@Override

@Override
public List<String> getURLsFromPage(Document doc) {
  List<String> result = new ArrayList<>();
  Element elem = doc.select("div[id=cc-comicbody] > a > img[id=cc-comic]").first();
  // The site doesn't return properly encoded urls we replace all spaces ( ) with %20
  result.add(elem.attr("src").replaceAll(" ", "%20"));
  return result;
}

@Override
public Document getNextPage(Document doc) throws IOException {
  if (doc.select("a[data-page=next]").first() != null) {
    if (doc.select("a[data-page=next]").first().attr("href").startsWith("http")) {
      return Http.url(doc.select("a[data-page=next]").first().attr("href")).get();
    }
  }
  throw new IOException("No more pages");
}

/**
 * Get the form element's value of the first matched element.
 * @return The form element's value, or empty if not set.
 * @see Element#val()
 */
public String val() {
  if (size() > 0)
    return first().val();
  else
    return "";
}

public Element $element(String selector) {
  Elements elements = $(selector);
  if (elements != null && elements.size() > 0) {
    return elements.first();
  }
  return null;
}

@Override
public Document getNextPage(Document doc) throws IOException {
  // luscious sends xhr requests to nextPageUrl and appends new set of images to the current page while in browser.
  // Simply GET the nextPageUrl also works. Therefore, we do this...
  Element nextPageElement = doc.select("div#next_page > div > a").first();
  if (nextPageElement == null) {
    throw new IOException("No next page found.");
  }
  return Http.url(nextPageElement.attr("abs:href")).get();
}

/**
 Get the string contents of the document's {@code title} element.
 @return Trimmed title, or empty string if none set.
 */
public String title() {
  // title is a preserve whitespace tag (for document output), but normalised here
  Element titleEl = getElementsByTag("title").first();
  return titleEl != null ? StringUtil.normaliseWhitespace(titleEl.text()).trim() : "";
}

public static String getFirstImageSrc(String html) {
  if (StrUtils.isBlank(html))
    return null;
  Elements es = Jsoup.parseBodyFragment(html).select("img");
  if (es != null && es.size() > 0) {
    String src = es.first().attr("src");
    return StrUtils.isBlank(src) ? null : src;
  }
  return null;
}

@Override
public Document getNextPage(Document doc) throws IOException {
  int offset = Integer.parseInt(doc.getElementsByTag("posts").first().attr("offset"));
  int num = Integer.parseInt(doc.getElementsByTag("posts").first().attr("count"));
  if (offset + 100 > num) {
    return null;
  }
  return Http.url(getPage(offset / 100 + 1)).get();
}

public String getAlbumTitle(URL url) throws MalformedURLException {
  try {
    // Attempt to use album title as GID
    Document doc = getFirstPage();
    Elements elems = doc.select(".albumName");
    return getHost() + "_" + elems.first().text();
  } catch (Exception e) {
    // Fall back to default album naming convention
    LOGGER.warn("Failed to get album title from " + url, e);
  }
  return super.getAlbumTitle(url);
}

@Override
public Document getNextPage(Document doc) throws IOException {
  sleep(1000);
  Element elem = doc.select("div[id=topnav] > nav.cc-nav > a.cc-next").first();
  if (elem == null) {
    throw new IOException("No more pages");
  }
  String nextPage = elem.attr("href");
  return Http.url(nextPage).get();
}

@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
  try {
    // Attempt to use album title as GID
    return getHost() + "_" + getGID(url) + "_" + getFirstPage().select("title").first().text().replaceAll(" ", "_");
  } catch (IOException e) {
    // Fall back to default album naming convention
    LOGGER.info("Unable to find title at " + url);
  }
  return super.getAlbumTitle(url);
}

@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
  try {
    Document doc = getFirstPage();
    String title = doc.select("div[id=main] > table.listTable > tbody > tr > td.listLong").first().text();
    return getHost() + "_" + title + "_" + getGID(url);
  } catch (Exception e) {
    // Fall back to default album naming convention
    LOGGER.warn("Failed to get album title from " + url, e);
  }
  return super.getAlbumTitle(url);
}

@Override
public Document getNextPage(Document doc) throws IOException {
  // Find next page
  Elements hrefs = doc.select("a.next");
  if (hrefs.isEmpty()) {
    throw new IOException("No more pages");
  }
  String nextUrl = "http://www.bcfakes.com" + hrefs.first().attr("href");
  sleep(500);
  return Http.url(nextUrl).get();
}

@Override
public Document getNextPage(Document doc) throws IOException {
  // Find next page
  Elements hrefs = doc.select("a.pagination_current + a.pagination_link");
  if (hrefs.isEmpty()) {
    throw new IOException("No more pages");
  }
  String nextUrl = "http://www.imagebam.com" + hrefs.first().attr("href");
  sleep(500);
  return Http.url(nextUrl).get();
}

@Override
public String getAlbumTitle(URL url) throws MalformedURLException {
  try {
    Document doc = getFirstPage();
    String title = doc.select("h3 > strong").first().text(); // profile name
    return getHost() + "_" + title + "_" + getGID(url);
  } catch (Exception e) {
    // Fall back to default album naming convention
    LOGGER.warn("Failed to get album title from " + url, e);
  }
  return super.getAlbumTitle(url);
}

@Override
public Document getNextPage(Document doc) throws IOException {
  // Find next page
  Elements hrefs = doc.select("a[title=\"Next page\"]");
  if (hrefs.isEmpty()) {
    throw new IOException("No more pages");
  }
  String nextUrl = "https://jabarchives.com" + hrefs.first().attr("href");
  sleep(500);
  return Http.url(nextUrl).get();
}

private static int parsePages(Document d, String body) throws ParseException {
  try {
    Elements es = d.getElementsByClass("ptt").first().child(0).child(0).children();
    return Integer.parseInt(es.get(es.size() - 2).text().trim());
  } catch (Throwable e) {
    ExceptionUtils.throwIfFatal(e);
    throw new ParseException("Can't parse gallery list pages", body);
  }
}

/**
 * Parse preview pages with html parser
 */
public static int parsePreviewPages(Document document, String body) throws ParseException {
  try {
    Elements elements = document.getElementsByClass("ptt").first().child(0).child(0).children();
    return Integer.parseInt(elements.get(elements.size() - 2).text());
  } catch (Throwable e) {
    ExceptionUtils.throwIfFatal(e);
    e.printStackTrace();
    throw new ParseException("Can't parse preview pages", body);
  }
}

@Override
public Document getNextPage(Document doc) throws IOException {
  // Find next page
  Elements nextPageUrl = doc.select("a.right");
  if (nextPageUrl.isEmpty()) {
    throw new IOException("No more pages");
  }
  String nextUrl = urlBase + nextPageUrl.first().attr("href");
  sleep(500);
  Document nextPage = Http.url(nextUrl).cookies(cookies).get();
  return nextPage;
}

Javadoc

Get the first matched element.

Popular methods of Elements

get
size
text
Get the combined text of all the matched elements. Note that it is possible to get repeats if the ma
isEmpty
attr
Set an attribute on all matched elements.
select
Find matching elements within this element list.
remove
stream
html
Set the inner HTML of each matched element.
last
Get the last matched element.
iterator
<init>

Popular in Java

Parsing JSON documents to java classes using gson
getSharedPreferences (Context)
getContentResolver (Context)
notifyDataSetChanged (ArrayAdapter)
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
FileReader (java.io)
A specialized Reader that reads from a file in the file system. All read requests made by calling me
Hashtable (java.util)
A plug-in replacement for JDK1.5 java.util.Hashtable. This version is based on org.cliffc.high_scale
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
Top plugins for WebStorm

How to use firstmethodin org.jsoup.select.Elements

Best Java code snippets using org.jsoup.select.Elements.first (Showing top 20 results out of 1,233)

How to use
first
method
in
org.jsoup.select.Elements