public void apply(String field, CaptureSearchResult result) throws CDXFormatException { result.setOriginalUrl(field); }
public void apply(String field, CaptureSearchResult result) throws CDXFormatException { result.setOriginalUrl(field); }
@Override public String getOriginalUrl() { String url = get(CAPTURE_ORIGINAL_URL); if (url == null) { // convert from ORIG_HOST to ORIG_URL here: url = getUrlKey(); String host = get(CAPTURE_ORIGINAL_HOST); if (url != null && host != null) { StringBuilder sb = new StringBuilder(url.length()); sb.append(UrlOperations.DEFAULT_SCHEME); sb.append(host); sb.append(UrlOperations.getURLPath(url)); url = sb.toString(); // cache it for next time...? setOriginalUrl(url); } } return url; }
@Override public String getOriginalUrl() { String url = get(CAPTURE_ORIGINAL_URL); if (url == null) { // convert from ORIG_HOST to ORIG_URL here: url = getUrlKey(); String host = get(CAPTURE_ORIGINAL_HOST); if (url != null && host != null) { StringBuilder sb = new StringBuilder(url.length()); sb.append(UrlOperations.DEFAULT_SCHEME); sb.append(host); sb.append(UrlOperations.getURLPath(url)); url = sb.toString(); // cache it for next time...? setOriginalUrl(url); } } return url; }
private boolean isBlocked(ObjectFilter<CaptureSearchResult> filter, String url) { CaptureSearchResult result = new CaptureSearchResult(); result.setOriginalUrl(url); int filterResult = filter.filterObject(result); if(filterResult == ObjectFilter.FILTER_EXCLUDE) { return true; } return false; }
/** * @param urlString String URL that should be checked for blocking. * @param timestamp String 14-digit timestamp to check for blocking. * @return true if the url-timestamp should not be shown to end users */ public boolean isExcluded(String urlString, String timestamp) { CaptureSearchResult sr = new CaptureSearchResult(); sr.setOriginalUrl(urlString); sr.setCaptureTimestamp(Timestamp.parseBefore(timestamp).getDateStr()); int ruling = filter.filterObject(sr); return (ruling != ObjectFilter.FILTER_INCLUDE); }
/** * @param urlString String URL that should be checked for blocking. * @param timestamp String 14-digit timestamp to check for blocking. * @return true if the url-timestamp should not be shown to end users */ public boolean isExcluded(String urlString, String timestamp) { CaptureSearchResult sr = new CaptureSearchResult(); sr.setOriginalUrl(urlString); sr.setCaptureTimestamp(Timestamp.parseBefore(timestamp).getDateStr()); int ruling = filter.filterObject(sr); return (ruling != ObjectFilter.FILTER_INCLUDE); }
@Override public boolean includeUrl(String urlKey, String originalUrl) { // return include(urlKey, originalUrl, true); if (UrlOperations.urlToScheme(originalUrl) == null) { originalUrl = UrlOperations.HTTP_SCHEME + originalUrl; } CaptureSearchResult resultTester = new FastCaptureSearchResult(); resultTester.setUrlKey(urlKey); resultTester.setOriginalUrl(originalUrl); // null captureTimestamp signifies per-URL access-check. resultTester.setCaptureTimestamp(null); return include(resultTester, true); }
@Override public boolean includeUrl(String urlKey, String originalUrl) { // return include(urlKey, originalUrl, true); if (UrlOperations.urlToScheme(originalUrl) == null) { originalUrl = UrlOperations.HTTP_SCHEME + originalUrl; } CaptureSearchResult resultTester = new FastCaptureSearchResult(); resultTester.setUrlKey(urlKey); resultTester.setOriginalUrl(originalUrl); // null captureTimestamp signifies per-URL access-check. resultTester.setCaptureTimestamp(null); return include(resultTester, true); }
@Override public CaptureSearchResult next() { String line = it.next(); String[] splits = line.split("\\s+"); CaptureSearchResult r = new CaptureSearchResult(); try { r.setCaptureDate(ArchiveUtils.parse14DigitDate(splits[0])); } catch (ParseException e) { e.printStackTrace(); } r.setOriginalUrl(url); r.setUrlKey(urlKey); // doesn't matter, or we get NPE r.setMimeType(splits[1]); r.setFile("foo"); // needed, or otherwise we'll get a NPE in CalendarResults.jsp r.setRedirectUrl("-"); r.setHttpCode("200"); r.setOffset(0); return r; }
protected void setUp() throws Exception { super.setUp(); RedirectRewritingHttpHeaderProcessor httpHeaderProcessor = new RedirectRewritingHttpHeaderProcessor(); httpHeaderProcessor.setPrefix("X-Archive-Orig-"); cut = new ArchivalUrlCSSReplayRenderer(httpHeaderProcessor); uriConverter = EasyMock.createMock(ResultURIConverter.class); response = EasyMock.createMock(HttpServletResponse.class); EasyMock.expect(response.getOutputStream()).andReturn(servletOutput); wbRequest = new WaybackRequest(); wbRequest.setFrameWrapperContext(false); result = new CaptureSearchResult(); result.setOriginalUrl("http://www.example.com/"); result.setCaptureTimestamp("20100101123456"); }
result.setOffset(offset); result.setRedirectUrl(redirectUrl); result.setOriginalUrl(originalUrl); result.setUrlKey(urlKey); results.add(result);
public void testAllow() { // object properties are not really used except for originalUrl. CaptureSearchResult capture = new FastCaptureSearchResult(); capture.setOriginalUrl("http://www.example.com/"); int rv = cut.filterObject(capture); assertEquals(CustomPolicyOracleFilter.FILTER_INCLUDE, rv); }
public void testRobots() { // AccessControlClient translates "robots" policy into either // "allow" or "block" when robotLookupsEnable is true - which // are tested above. "robots" policy is considered as "allow". acClient.policyToReturn = "robots"; // object properties are not really used except for originalUrl. CaptureSearchResult capture = new FastCaptureSearchResult(); capture.setOriginalUrl("http://www.example.com/"); int rv = cut.filterObject(capture); assertEquals(CustomPolicyOracleFilter.FILTER_INCLUDE, rv); } }
public void testBlockMessage() { acClient.policyToReturn = "block-message"; // object properties are not really used except for originalUrl. CaptureSearchResult capture = new FastCaptureSearchResult(); capture.setOriginalUrl("http://www.example.com/"); int rv = cut.filterObject(capture); // Now "block" returns FILTER_INCLUDE, "X" flag in robotflags. assertEquals(CustomPolicyOracleFilter.FILTER_EXCLUDE, rv); }
private boolean isBlocked(ObjectFilter<CaptureSearchResult> filter, String url) throws URIException { CaptureSearchResult result = new CaptureSearchResult(); result.setOriginalUrl(url); result.setUrlKey(canonicalizer.urlStringToKey(url)); int filterResult = filter.filterObject(result); if(filterResult == ObjectFilter.FILTER_EXCLUDE) { return true; } return false; }
result.setOriginalUrl("http://www.example.com/");
public void testBlock() { acClient.policyToReturn = "block"; // object properties are not really used except for originalUrl. CaptureSearchResult capture = new FastCaptureSearchResult(); capture.setOriginalUrl("http://www.example.com/"); int rv = cut.filterObject(capture); // Now "block" returns FILTER_INCLUDE, "X" flag in robotflags. assertEquals(CustomPolicyOracleFilter.FILTER_INCLUDE, rv); assertEquals( Character.toString(CaptureSearchResult.CAPTURE_ROBOT_BLOCKED), capture.getRobotFlags()); }
result.setOriginalUrl("http://example.com/");
@Override protected void setUp() throws Exception { uriConverter = new ArchivalUrlResultURIConverter(); uriConverter.setReplayURIPrefix("http://replay.archive.org/"); fact = new ArchivalUrlContextResultURIConverterFactory( (ArchivalUrlResultURIConverter)uriConverter); // The URL of the page, for resolving in-page relative URLs: CaptureSearchResult capture = new CaptureSearchResult(); capture.setCaptureTimestamp(timestamp); capture.setOriginalUrl(baseUrl); // urlKey is not set as it is unused // set up the context: context = new ReplayParseContext(fact, capture); context.setOutputCharset(outputCharset); delegator = new FastArchivalUrlReplayParseEventHandler(); delegator.setEndJsp(null); delegator.setJspInsertPath(null); delegator.init(); }