public void testInvalidTopPrivateDomain() { ImmutableSet<String> badCookieDomains = ImmutableSet.of("co.uk", "foo", "com"); for (String domain : badCookieDomains) { try { InternetDomainName.from(domain).topPrivateDomain(); fail(domain); } catch (IllegalStateException expected) { } } }
public void testValidTopPrivateDomain() { InternetDomainName googleDomain = InternetDomainName.from("google.com"); assertEquals(googleDomain, googleDomain.topPrivateDomain()); assertEquals(googleDomain, googleDomain.child("mail").topPrivateDomain()); assertEquals(googleDomain, googleDomain.child("foo.bar").topPrivateDomain()); }
public void testPublicSuffixMultipleUnders() { // PSL has both *.uk and *.sch.uk; the latter should win. // See http://code.google.com/p/guava-libraries/issues/detail?id=1176 InternetDomainName domain = InternetDomainName.from("www.essex.sch.uk"); assertTrue(domain.hasPublicSuffix()); assertEquals("essex.sch.uk", domain.publicSuffix().toString()); assertEquals("www.essex.sch.uk", domain.topPrivateDomain().toString()); }
/** * Adds outlinks to whois:{domain} and whois:{ipAddress} */ protected void addWhoisLinks(CrawlURI curi) throws InterruptedException { CrawlHost ch = serverCache.getHostFor(curi.getUURI()); if (ch == null) { return; } if (ch.getIP() != null) { // do a whois lookup on the ip address addWhoisLink(curi, ch.getIP().getHostAddress()); } if (InternetDomainName.isValid(ch.getHostName())) { // do a whois lookup on the domain try { String topmostAssigned = InternetDomainName.from(ch.getHostName()).topPrivateDomain().toString(); addWhoisLink(curi, topmostAssigned); } catch (IllegalStateException e) { // java.lang.IllegalStateException: Not under a public suffix: mod.uk logger.warning("problem resolving topmost assigned domain, will try whois lookup on the plain hostname " + ch.getHostName() + " - " + e); addWhoisLink(curi, ch.getHostName()); } } }
public class Test { public static void main(String[] args) throws URISyntaxException { ImmutableList<String> urls = ImmutableList.of( "http://example.google.com", "http://google.com", "http://bing.bing.bing.com", "http://www.amazon.co.jp/"); for (String url : urls) { System.out.println(url + " -> " + getTopPrivateDomain(url)); } } private static String getTopPrivateDomain(String url) throws URISyntaxException { String host = new URI(url).getHost(); InternetDomainName domainName = InternetDomainName.from(host); return domainName.topPrivateDomain().name(); } }
public static String getTopLevelDomain(String uri) { InternetDomainName fullDomainName = InternetDomainName.from(uri); InternetDomainName publicDomainName = fullDomainName.topPrivateDomain(); String topDomain = ""; Iterator<String> it = publicDomainName.parts().iterator(); while(it.hasNext()){ String part = it.next(); if(!topDomain.isEmpty())topDomain += "."; topDomain += part; } return topDomain; }
public static String getRootDomain(String inputDomain) { try { String rootDomain =InternetDomainName.from(inputDomain).topPrivateDomain().toString(); return rootDomain; }catch(Exception e) { return null; } }
private String extractCompanyFromHostName(String hostname) { try { InternetDomainName domainName = InternetDomainName.from(hostname); return Normalize.brand(domainName.topPrivateDomain().parts().get(0)); } catch (RuntimeException e) { return null; } }
public void actionPerformed(ActionEvent e) { String enteredRootDomain = JOptionPane.showInputDialog("Enter Root Domain", null); enteredRootDomain = enteredRootDomain.trim(); enteredRootDomain =InternetDomainName.from(enteredRootDomain).topPrivateDomain().toString(); String keyword = enteredRootDomain.substring(0,enteredRootDomain.indexOf(".")); domainResult.AddToRootDomainMap(enteredRootDomain, keyword); showToUI(domainResult); /* if (domainResult.rootDomainMap.containsKey(enteredRootDomain) && domainResult.rootDomainMap.containsValue(keyword)) { //do nothing }else { domainResult.rootDomainMap.put(enteredRootDomain,keyword); showToUI(domainResult); }*/ } });
private String extractSlugFromHost(String host) { String rootDomain; String siteName = siteSettings.getWebDomainName().or(siteSettings.getDomainName()); if (Strings.emptyToNull(siteName) == null) { InternetDomainName domainName = InternetDomainName.from(host); if (domainName.hasPublicSuffix()) { // Domain is under a valid TLD, extract the TLD + first child rootDomain = domainName.topPrivateDomain().name(); } else if (host.indexOf(".") > 0 && host.indexOf(".") < host.length()) { // Otherwise, best guess : strip everything before the first dot. rootDomain = host.substring(host.indexOf(".") + 1); } else { rootDomain = host; } } else { rootDomain = StringUtils.substringBefore(siteSettings.getDomainName(), ":"); } if (host.indexOf("." + rootDomain) > 0) { return host.substring(0, host.indexOf("." + rootDomain)); } else { return host; } }
suffix = domainName.topPrivateDomain(); } else { suffix = domainName;
domain = ".localhost"; } else { domain = ".".concat(InternetDomainName.from(new URL(screenshotContext.url).getHost()).topPrivateDomain().toString());
public void testInvalidTopPrivateDomain() { ImmutableSet<String> badCookieDomains = ImmutableSet.of("co.uk", "foo", "com"); for (String domain : badCookieDomains) { try { InternetDomainName.from(domain).topPrivateDomain(); fail(domain); } catch (IllegalStateException expected) { } } }
public static String getTopLevelDomain(String host) { InternetDomainName domain = null; try { domain = getDomainName(host); if(domain.isUnderPublicSuffix()) { return domain.topPrivateDomain().toString(); } else { // if the domain is a public suffix, just use it as top level domain return domain.toString(); } } catch (IllegalArgumentException e) { // when host is an IP address, use it as TLD if(InetAddresses.isInetAddress(host)) { return host; } throw new IllegalStateException("Invalid top private domain name=["+domain+"] in URL=["+host+"]", e); } }
public void testValidTopPrivateDomain() { InternetDomainName googleDomain = InternetDomainName.from("google.com"); assertEquals(googleDomain, googleDomain.topPrivateDomain()); assertEquals(googleDomain, googleDomain.child("mail").topPrivateDomain()); assertEquals(googleDomain, googleDomain.child("foo.bar").topPrivateDomain()); }
if (! DOTTED_ADDRESS.matcher(name).matches()) { final InternetDomainName idn = InternetDomainName.from(name); if (idn.isUnderPublicSuffix()) name = idn.topPrivateDomain().toString();
@Override public GeneralisedWebAddress getGeneralisedHostName(URL url) { String host = url.getHost(); // if (! url.getHost().contains(".")) { // return GeneralisedWebAddress.build(host, GeneralisedWebAddressType.HOST_ADDRESS); // } else InetAddress ip = extractInetAddress(host); if (ip != null) { if (ip instanceof Inet4Address ) { return GeneralisedWebAddress.build(host.replaceFirst("\\d+$", ""), GeneralisedWebAddressType.IPV4_ADDRESS); } else if (ip instanceof Inet6Address) { return GeneralisedWebAddress.build(ip.getHostAddress(), GeneralisedWebAddressType.IPV6_ADDRESS); } } else if (InternetDomainName.isValid(host)) { InternetDomainName domainName = InternetDomainName.from(host); if (domainName.isUnderPublicSuffix()) { return GeneralisedWebAddress.build(domainName.topPrivateDomain().toString(), GeneralisedWebAddressType.DOMAIN_NAME); } else if (domainName.hasParent()) { return GeneralisedWebAddress.build(domainName.parent().toString(), GeneralisedWebAddressType.DOMAIN_NAME); } return GeneralisedWebAddress.build(host, GeneralisedWebAddressType.HOST_ADDRESS); } return null; }
/** * Adds outlinks to whois:{domain} and whois:{ipAddress} */ protected void addWhoisLinks(CrawlURI curi) throws InterruptedException { CrawlHost ch = serverCache.getHostFor(curi.getUURI()); if (ch == null) { return; } if (ch.getIP() != null) { // do a whois lookup on the ip address addWhoisLink(curi, ch.getIP().getHostAddress()); } if (InternetDomainName.isValid(ch.getHostName())) { // do a whois lookup on the domain try { String topmostAssigned = InternetDomainName.from(ch.getHostName()).topPrivateDomain().toString(); addWhoisLink(curi, topmostAssigned); } catch (IllegalStateException e) { // java.lang.IllegalStateException: Not under a public suffix: mod.uk logger.warning("problem resolving topmost assigned domain, will try whois lookup on the plain hostname " + ch.getHostName() + " - " + e); addWhoisLink(curi, ch.getHostName()); } } }
public TargetModelElasticSearch(TargetModelCbor model) { URL url = Urls.toJavaURL(model.url); String rawContent = (String) model.response.get("body"); Page page = new Page(url, rawContent); page.setParsedData(new ParsedData(new PaginaURL(url, rawContent))); this.html = rawContent; this.url = model.url; this.retrieved = new Date(model.timestamp * 1000); this.words = page.getParsedData().getWords(); this.wordsMeta = page.getParsedData().getWordsMeta(); this.title = page.getParsedData().getTitle(); this.domain = url.getHost(); try { this.text = DefaultExtractor.getInstance().getText(page.getContentAsString()); } catch (Exception e) { this.text = ""; } InternetDomainName domainName = InternetDomainName.from(page.getDomainName()); if (domainName.isUnderPublicSuffix()) { this.topPrivateDomain = domainName.topPrivateDomain().toString(); } else { this.topPrivateDomain = domainName.toString(); } }
public void testPublicSuffixMultipleUnders() { // PSL has both *.uk and *.sch.uk; the latter should win. // See http://code.google.com/p/guava-libraries/issues/detail?id=1176 InternetDomainName domain = InternetDomainName.from("www.essex.sch.uk"); assertTrue(domain.hasPublicSuffix()); assertEquals("essex.sch.uk", domain.publicSuffix().toString()); assertEquals("www.essex.sch.uk", domain.topPrivateDomain().toString()); }