/** * Returns the portion of this domain name that is one level beneath the {@linkplain * #isPublicSuffix() public suffix}. For example, for {@code x.adwords.google.co.uk} it returns * {@code google.co.uk}, since {@code co.uk} is a public suffix. Similarly, for {@code * myblog.blogspot.com} it returns the same domain, {@code myblog.blogspot.com}, since {@code * blogspot.com} is a public suffix. * * <p>If {@link #isTopPrivateDomain()} is true, the current domain name instance is returned. * * <p>This method can be used to determine the probable highest level parent domain for which * cookies may be set, though even that depends on individual browsers' implementations of cookie * controls. * * @throws IllegalStateException if this domain does not end with a public suffix * @since 6.0 */ public InternetDomainName topPrivateDomain() { if (isTopPrivateDomain()) { return this; } checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name); return ancestor(publicSuffixIndex - 1); }
/** * Returns the portion of this domain name that is one level beneath the {@linkplain * #isPublicSuffix() public suffix}. For example, for {@code x.adwords.google.co.uk} it returns * {@code google.co.uk}, since {@code co.uk} is a public suffix. Similarly, for {@code * myblog.blogspot.com} it returns the same domain, {@code myblog.blogspot.com}, since {@code * blogspot.com} is a public suffix. * * <p>If {@link #isTopPrivateDomain()} is true, the current domain name instance is returned. * * <p>This method can be used to determine the probable highest level parent domain for which * cookies may be set, though even that depends on individual browsers' implementations of cookie * controls. * * @throws IllegalStateException if this domain does not end with a public suffix * @since 6.0 */ public InternetDomainName topPrivateDomain() { if (isTopPrivateDomain()) { return this; } checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name); return ancestor(publicSuffixIndex - 1); }
/** * Returns the portion of this domain name that is one level beneath the {@linkplain * #isPublicSuffix() public suffix}. For example, for {@code x.adwords.google.co.uk} it returns * {@code google.co.uk}, since {@code co.uk} is a public suffix. Similarly, for {@code * myblog.blogspot.com} it returns the same domain, {@code myblog.blogspot.com}, since {@code * blogspot.com} is a public suffix. * * <p>If {@link #isTopPrivateDomain()} is true, the current domain name instance is returned. * * <p>This method can be used to determine the probable highest level parent domain for which * cookies may be set, though even that depends on individual browsers' implementations of cookie * controls. * * @throws IllegalStateException if this domain does not end with a public suffix * @since 6.0 */ public InternetDomainName topPrivateDomain() { if (isTopPrivateDomain()) { return this; } checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name); return ancestor(publicSuffixIndex - 1); }
public void testUnderPublicSuffix() { for (String name : SOMEWHERE_UNDER_PS) { final InternetDomainName domain = InternetDomainName.from(name); assertFalse(name, domain.isPublicSuffix()); assertTrue(name, domain.hasPublicSuffix()); assertTrue(name, domain.isUnderPublicSuffix()); } }
public void testUnderPrivateDomain() { for (String name : UNDER_PRIVATE_DOMAIN) { final InternetDomainName domain = InternetDomainName.from(name); assertFalse(name, domain.isPublicSuffix()); assertTrue(name, domain.hasPublicSuffix()); assertTrue(name, domain.isUnderPublicSuffix()); assertFalse(name, domain.isTopPrivateDomain()); } }
public void testTopPrivateDomain() { for (String name : TOP_PRIVATE_DOMAIN) { final InternetDomainName domain = InternetDomainName.from(name); assertFalse(name, domain.isPublicSuffix()); assertTrue(name, domain.hasPublicSuffix()); assertTrue(name, domain.isUnderPublicSuffix()); assertTrue(name, domain.isTopPrivateDomain()); assertEquals(domain.parent(), domain.publicSuffix()); } }
public void testPublicSuffix() { for (String name : PS) { final InternetDomainName domain = InternetDomainName.from(name); assertTrue(name, domain.isPublicSuffix()); assertTrue(name, domain.hasPublicSuffix()); assertFalse(name, domain.isUnderPublicSuffix()); assertFalse(name, domain.isTopPrivateDomain()); assertEquals(domain, domain.publicSuffix()); } for (String name : NO_PS) { final InternetDomainName domain = InternetDomainName.from(name); assertFalse(name, domain.isPublicSuffix()); assertFalse(name, domain.hasPublicSuffix()); assertFalse(name, domain.isUnderPublicSuffix()); assertFalse(name, domain.isTopPrivateDomain()); assertNull(domain.publicSuffix()); } for (String name : NON_PS) { final InternetDomainName domain = InternetDomainName.from(name); assertFalse(name, domain.isPublicSuffix()); assertTrue(name, domain.hasPublicSuffix()); assertTrue(name, domain.isUnderPublicSuffix()); } }
/** * Returns the portion of this domain name that is one level beneath the {@linkplain * #isPublicSuffix() public suffix}. For example, for {@code x.adwords.google.co.uk} it returns * {@code google.co.uk}, since {@code co.uk} is a public suffix. Similarly, for {@code * myblog.blogspot.com} it returns the same domain, {@code myblog.blogspot.com}, since {@code * blogspot.com} is a public suffix. * * <p>If {@link #isTopPrivateDomain()} is true, the current domain name instance is returned. * * <p>This method can be used to determine the probable highest level parent domain for which * cookies may be set, though even that depends on individual browsers' implementations of cookie * controls. * * @throws IllegalStateException if this domain does not end with a public suffix * @since 6.0 */ public InternetDomainName topPrivateDomain() { if (isTopPrivateDomain()) { return this; } checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name); return ancestor(publicSuffixIndex - 1); }
/** * Returns the portion of this domain name that is one level beneath the {@linkplain * #isPublicSuffix() public suffix}. For example, for {@code x.adwords.google.co.uk} it returns * {@code google.co.uk}, since {@code co.uk} is a public suffix. Similarly, for {@code * myblog.blogspot.com} it returns the same domain, {@code myblog.blogspot.com}, since {@code * blogspot.com} is a public suffix. * * <p>If {@link #isTopPrivateDomain()} is true, the current domain name instance is returned. * * <p>This method can be used to determine the probable highest level parent domain for which * cookies may be set, though even that depends on individual browsers' implementations of cookie * controls. * * @throws IllegalStateException if this domain does not end with a public suffix * @since 6.0 */ public InternetDomainName topPrivateDomain() { if (isTopPrivateDomain()) { return this; } checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name); return ancestor(publicSuffixIndex - 1); }
/** * Returns the portion of this domain name that is one level beneath the * public suffix. For example, for {@code x.adwords.google.co.uk} it returns * {@code google.co.uk}, since {@code co.uk} is a public suffix. * * <p>If {@link #isTopPrivateDomain()} is true, the current domain name * instance is returned. * * <p>This method should not be used to determine the topmost parent domain * which is addressable as a host, as many public suffixes are also * addressable hosts. For example, the domain {@code foo.bar.uk.com} has * a public suffix of {@code uk.com}, so it would return {@code bar.uk.com} * from this method. But {@code uk.com} is itself an addressable host. * * <p>This method can be used to determine the probable highest level parent * domain for which cookies may be set, though even that depends on individual * browsers' implementations of cookie controls. * * @throws IllegalStateException if this domain does not end with a * public suffix * @since 6.0 */ public InternetDomainName topPrivateDomain() { if (isTopPrivateDomain()) { return this; } checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name); return ancestor(publicSuffixIndex - 1); }
/** * Returns the portion of this domain name that is one level beneath the * public suffix. For example, for {@code x.adwords.google.co.uk} it returns * {@code google.co.uk}, since {@code co.uk} is a public suffix. * * <p>If {@link #isTopPrivateDomain()} is true, the current domain name * instance is returned. * * <p>This method should not be used to determine the topmost parent domain * which is addressable as a host, as many public suffixes are also * addressable hosts. For example, the domain {@code foo.bar.uk.com} has * a public suffix of {@code uk.com}, so it would return {@code bar.uk.com} * from this method. But {@code uk.com} is itself an addressable host. * * <p>This method can be used to determine the probable highest level parent * domain for which cookies may be set, though even that depends on individual * browsers' implementations of cookie controls. * * @throws IllegalStateException if this domain does not end with a * public suffix * @since 6.0 */ public InternetDomainName topPrivateDomain() { if (isTopPrivateDomain()) { return this; } checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name); return ancestor(publicSuffixIndex - 1); }
/** * Returns the portion of this domain name that is one level beneath the * public suffix. For example, for {@code x.adwords.google.co.uk} it returns * {@code google.co.uk}, since {@code co.uk} is a public suffix. * * <p>If {@link #isTopPrivateDomain()} is true, the current domain name * instance is returned. * * <p>This method should not be used to determine the topmost parent domain * which is addressable as a host, as many public suffixes are also * addressable hosts. For example, the domain {@code foo.bar.uk.com} has * a public suffix of {@code uk.com}, so it would return {@code bar.uk.com} * from this method. But {@code uk.com} is itself an addressable host. * * <p>This method can be used to determine the probable highest level parent * domain for which cookies may be set, though even that depends on individual * browsers' implementations of cookie controls. * * @throws IllegalStateException if this domain does not end with a * public suffix * @since 6.0 */ public InternetDomainName topPrivateDomain() { if (isTopPrivateDomain()) { return this; } checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name); return ancestor(publicSuffixIndex - 1); }
/** * Returns the portion of this domain name that is one level beneath the * public suffix. For example, for {@code x.adwords.google.co.uk} it returns * {@code google.co.uk}, since {@code co.uk} is a public suffix. * * <p>If {@link #isTopPrivateDomain()} is true, the current domain name * instance is returned. * * <p>This method should not be used to determine the topmost parent domain * which is addressable as a host, as many public suffixes are also * addressable hosts. For example, the domain {@code foo.bar.uk.com} has * a public suffix of {@code uk.com}, so it would return {@code bar.uk.com} * from this method. But {@code uk.com} is itself an addressable host. * * <p>This method can be used to determine the probable highest level parent * domain for which cookies may be set, though even that depends on individual * browsers' implementations of cookie controls. * * @throws IllegalStateException if this domain does not end with a * public suffix * @since 6.0 */ public InternetDomainName topPrivateDomain() { if (isTopPrivateDomain()) { return this; } checkState(isUnderPublicSuffix(), "Not under a public suffix: %s", name); return ancestor(publicSuffixIndex - 1); }
public static String getTopLevelDomain(String host) { InternetDomainName domain = null; try { domain = getDomainName(host); if(domain.isUnderPublicSuffix()) { return domain.topPrivateDomain().toString(); } else { // if the domain is a public suffix, just use it as top level domain return domain.toString(); } } catch (IllegalArgumentException e) { // when host is an IP address, use it as TLD if(InetAddresses.isInetAddress(host)) { return host; } throw new IllegalStateException("Invalid top private domain name=["+domain+"] in URL=["+host+"]", e); } }
@Override public GeneralisedWebAddress getGeneralisedHostName(URL url) { String host = url.getHost(); // if (! url.getHost().contains(".")) { // return GeneralisedWebAddress.build(host, GeneralisedWebAddressType.HOST_ADDRESS); // } else InetAddress ip = extractInetAddress(host); if (ip != null) { if (ip instanceof Inet4Address ) { return GeneralisedWebAddress.build(host.replaceFirst("\\d+$", ""), GeneralisedWebAddressType.IPV4_ADDRESS); } else if (ip instanceof Inet6Address) { return GeneralisedWebAddress.build(ip.getHostAddress(), GeneralisedWebAddressType.IPV6_ADDRESS); } } else if (InternetDomainName.isValid(host)) { InternetDomainName domainName = InternetDomainName.from(host); if (domainName.isUnderPublicSuffix()) { return GeneralisedWebAddress.build(domainName.topPrivateDomain().toString(), GeneralisedWebAddressType.DOMAIN_NAME); } else if (domainName.hasParent()) { return GeneralisedWebAddress.build(domainName.parent().toString(), GeneralisedWebAddressType.DOMAIN_NAME); } return GeneralisedWebAddress.build(host, GeneralisedWebAddressType.HOST_ADDRESS); } return null; }
public TargetModelElasticSearch(TargetModelCbor model) { URL url = Urls.toJavaURL(model.url); String rawContent = (String) model.response.get("body"); Page page = new Page(url, rawContent); page.setParsedData(new ParsedData(new PaginaURL(url, rawContent))); this.html = rawContent; this.url = model.url; this.retrieved = new Date(model.timestamp * 1000); this.words = page.getParsedData().getWords(); this.wordsMeta = page.getParsedData().getWordsMeta(); this.title = page.getParsedData().getTitle(); this.domain = url.getHost(); try { this.text = DefaultExtractor.getInstance().getText(page.getContentAsString()); } catch (Exception e) { this.text = ""; } InternetDomainName domainName = InternetDomainName.from(page.getDomainName()); if (domainName.isUnderPublicSuffix()) { this.topPrivateDomain = domainName.topPrivateDomain().toString(); } else { this.topPrivateDomain = domainName.toString(); } }
public void testUnderPublicSuffix() { for (String name : SOMEWHERE_UNDER_PS) { final InternetDomainName domain = InternetDomainName.from(name); assertFalse(name, domain.isPublicSuffix()); assertTrue(name, domain.hasPublicSuffix()); assertTrue(name, domain.isUnderPublicSuffix()); } }
public void testUnderPrivateDomain() { for (String name : UNDER_PRIVATE_DOMAIN) { final InternetDomainName domain = InternetDomainName.from(name); assertFalse(name, domain.isPublicSuffix()); assertTrue(name, domain.hasPublicSuffix()); assertTrue(name, domain.isUnderPublicSuffix()); assertFalse(name, domain.isTopPrivateDomain()); } }
public void testTopPrivateDomain() { for (String name : TOP_PRIVATE_DOMAIN) { final InternetDomainName domain = InternetDomainName.from(name); assertFalse(name, domain.isPublicSuffix()); assertTrue(name, domain.hasPublicSuffix()); assertTrue(name, domain.isUnderPublicSuffix()); assertTrue(name, domain.isTopPrivateDomain()); assertEquals(domain.parent(), domain.publicSuffix()); } }
public void testPublicSuffix() { for (String name : PS) { final InternetDomainName domain = InternetDomainName.from(name); assertTrue(name, domain.isPublicSuffix()); assertTrue(name, domain.hasPublicSuffix()); assertFalse(name, domain.isUnderPublicSuffix()); assertFalse(name, domain.isTopPrivateDomain()); assertEquals(domain, domain.publicSuffix()); } for (String name : NO_PS) { final InternetDomainName domain = InternetDomainName.from(name); assertFalse(name, domain.isPublicSuffix()); assertFalse(name, domain.hasPublicSuffix()); assertFalse(name, domain.isUnderPublicSuffix()); assertFalse(name, domain.isTopPrivateDomain()); assertNull(domain.publicSuffix()); } for (String name : NON_PS) { final InternetDomainName domain = InternetDomainName.from(name); assertFalse(name, domain.isPublicSuffix()); assertTrue(name, domain.hasPublicSuffix()); assertTrue(name, domain.isUnderPublicSuffix()); } }