@Override public CaptureSearchResult next() { String line = it.next(); String[] splits = line.split("\\s+"); CaptureSearchResult r = new CaptureSearchResult(); try { r.setCaptureDate(ArchiveUtils.parse14DigitDate(splits[0])); } catch (ParseException e) { e.printStackTrace(); } r.setOriginalUrl(url); r.setUrlKey(urlKey); // doesn't matter, or we get NPE r.setMimeType(splits[1]); r.setFile("foo"); // needed, or otherwise we'll get a NPE in CalendarResults.jsp r.setRedirectUrl("-"); r.setHttpCode("200"); r.setOffset(0); return r; }
/** * Initializes with {@code originalUrl} and {@code captureTimestamp} from {@link CaptureSearchResult}. * @param capture Capture to redirect to. * @param captures Other captures for targetURI. */ public BetterReplayRequestException(CaptureSearchResult capture, CaptureSearchResults captures) { this(capture.getOriginalUrl(), capture.getCaptureTimestamp(), captures); }
/** * Mark this capture as a revisit of previous capture {@code payload}, identified by content digest. * <p>Record location information is copied from {@code payload} so that the content can be * loaded from the record later.</p> * <p>{@link ResourceIndex} implementations should call this method before returning * {@code CaptureSearchResult}s to {@code AccessPoint}.</p> * @param payload capture being revisited * @see #getDuplicateDigestStoredTimestamp() * @see #getDuplicateDigestStoredDate() * @see #getDuplicatePayloadFile() * @see #getDuplicatePayloadOffset() * @see #getDuplicatePayloadCompressedLength() */ public void flagDuplicateDigest(CaptureSearchResult payload) { flagDuplicateDigest(); put(CAPTURE_DUPLICATE_STORED_TS, payload.getCaptureTimestamp()); put(CAPTURE_DUPLICATE_PAYLOAD_FILE, payload.getFile()); put(CAPTURE_DUPLICATE_PAYLOAD_OFFSET, String.valueOf(payload.getOffset())); if (payload.getCompressedLength() > 0) { put(CAPTURE_DUPLICATE_PAYLOAD_COMPRESSED_LENGTH, String.valueOf(payload.getCompressedLength())); } }
@SuppressWarnings("deprecation") @Override public void flagDuplicateDigest(Date storedDate) { super.ensureMap(); super.flagDuplicateDigest(storedDate); }
/** @deprecated */ public void flagDuplicateDigest(Date storedDate) { flagDuplicateDigest(); put(CAPTURE_DUPLICATE_STORED_TS, dateToTS(storedDate)); }
CaptureSearchResult result = new CaptureSearchResult(); result.setFile(fileName); result.setCaptureTimestamp(captureDate); result.setHttpCode(httpCode); result.setDigest(digest); result.setMimeType(mimeType); result.setOffset(offset); result.setRedirectUrl(redirectUrl); result.setOriginalUrl(originalUrl); result.setUrlKey(urlKey); results.add(result);
private int annotate(CaptureSearchResult o) { if(lastSeen == null) { // TODO: log missing record digest reference return FILTER_EXCLUDE; } o.setFile(lastSeen.getFile()); o.setOffset(lastSeen.getOffset()); o.setDigest(lastSeen.getDigest()); o.setHttpCode(lastSeen.getHttpCode()); o.setMimeType(lastSeen.getMimeType()); o.setRedirectUrl(lastSeen.getRedirectUrl()); o.flagDuplicateHTTP(lastSeen.getCaptureTimestamp()); return FILTER_INCLUDE; }
ResourceNotAvailableException { if (!closest.isRevisitDigest()) { LOGGER.warning("Revisit: record is not a revisit by identical content digest " + closest.getCaptureTimestamp() + " " + closest.getOriginalUrl()); return null; if (closest.getDuplicatePayloadFile() != null && closest.getDuplicatePayloadOffset() != null) { payloadLocation = new CaptureSearchResult(); payloadLocation.setFile(closest.getDuplicatePayloadFile()); payloadLocation.setOffset(closest.getDuplicatePayloadOffset()); payloadLocation.setCompressedLength(closest.getDuplicatePayloadCompressedLength()); return payloadLocation; String captureTimestamp = payloadLocation.getCaptureTimestamp();
CaptureSearchResult result = new FastCaptureSearchResult(); if (prev != null) { prev.setNextResult(result); result.setPrevResult(prev); result.setOriginalUrl(originalUrl); result.setCaptureTimestamp(ts); result.setOffset(0); result.setFile(ts + ".warc.gz"); if (res.getRefersToDate() != null) { if (r.getCaptureTimestamp().equals(refTimestamp)) { result.flagDuplicateDigest(r); refTimestamp = null; break; result.flagDuplicateDigest(); .get(ArchiveFileConstants.URL_FIELD_KEY); String ts = meta.get(ArchiveFileConstants.DATE_FIELD_KEY); result.setOriginalUrl(originalUrl); result.setCaptureTimestamp(ts); } else { throw new AssertionError("unexpected Resource type: " + res.getClass()); result.setHttpCode(Integer.toString(res.getStatusCode()));
/** * @param urlString String URL that should be checked for blocking. * @param timestamp String 14-digit timestamp to check for blocking. * @return true if the url-timestamp should not be shown to end users */ public boolean isExcluded(String urlString, String timestamp) { CaptureSearchResult sr = new CaptureSearchResult(); sr.setOriginalUrl(urlString); sr.setCaptureTimestamp(Timestamp.parseBefore(timestamp).getDateStr()); int ruling = filter.filterObject(sr); return (ruling != ObjectFilter.FILTER_INCLUDE); }
public void testMimeTypeFromIndex() throws Exception { WaybackRequest wbRequest = new WaybackRequest(); CaptureSearchResult result = new CaptureSearchResult(); result.setMimeType("text/javascript"); Resource resource = createTestResource("text/javascript", "var i=1;".getBytes("UTF-8")); ReplayRenderer rr = cut.getRenderer(wbRequest, result, resource); assertEquals("js", ((TestReplayRenderer)rr).name); }
private boolean isBlocked(ObjectFilter<CaptureSearchResult> filter, String url) throws URIException { CaptureSearchResult result = new CaptureSearchResult(); result.setOriginalUrl(url); result.setUrlKey(canonicalizer.urlStringToKey(url)); int filterResult = filter.filterObject(result); if(filterResult == ObjectFilter.FILTER_EXCLUDE) { return true; } return false; }
result.setOriginalUrl(urlString); result.setUrlKey(canonUrl); int status = ar.getStatusCode(); if ((status == 200) || ((status >= 300) && (status < 400))) { result.setCaptureTimestamp(ar.getMetaData().getDate()); result.setMimeType(ar.getMetaData().getMimetype()); return r;
@Override public boolean includeUrl(String urlKey, String originalUrl) { // return include(urlKey, originalUrl, true); if (UrlOperations.urlToScheme(originalUrl) == null) { originalUrl = UrlOperations.HTTP_SCHEME + originalUrl; } CaptureSearchResult resultTester = new FastCaptureSearchResult(); resultTester.setUrlKey(urlKey); resultTester.setOriginalUrl(originalUrl); // null captureTimestamp signifies per-URL access-check. resultTester.setCaptureTimestamp(null); return include(resultTester, true); }
private int annotate(CaptureSearchResult o) { o.flagDuplicateDigest(); String thisDigest = o.getDigest(); CaptureSearchResult last = memory.get(thisDigest); if (last == null) { if (LOGGER.isLoggable(Level.FINER)) { LOGGER.finer("did not find matching digest in previous fetch of url, hopefully it's a new-style revisit - " + o.getCaptureTimestamp() + " " + o.getOriginalUrl()); } return FILTER_INCLUDE; } o.flagDuplicateDigest(last); return FILTER_INCLUDE; }
private boolean checkExclusionFilter( String uri ) { // Default to no exclusions: if( smef == null ) return true; // Otherwise: ExclusionFilter ef = smef.get(); CaptureSearchResult r = new CaptureSearchResult(); // r.setOriginalUrl(uri); r.setUrlKey( uri ); try { if( ef.filterObject( r ) == ExclusionFilter.FILTER_INCLUDE ) { return true; } } catch( Exception e ) { log.error( "Exclusion filtering failed with exception: " + e ); e.printStackTrace(); } log.debug( "EXCLUDING this URL due to filter: " + uri ); // Exclude: return false; }
prevResult.getCaptureTimestamp().equals(timestamp) && prevResult.getOriginalUrl().equals(originalUrl) && prevLine.getLength().equals(line.getLength()) && prevLine.getOffset().equals(line.getOffset())) { prevResult.setFile(currFile); if (revisits != null) { for (CaptureSearchResult revisit : revisits) { revisit.flagDuplicateDigest(result);
protected String getRawPolicy(String accessGroup, CaptureSearchResult capture) throws RobotsUnavailableException, RuleOracleUnavailableException { String url = capture.getOriginalUrl(); Date captureDate = capture.getCaptureDate(); Date retrievalDate = new Date(); return client.getPolicy(ArchiveUtils.addImpliedHttpIfNecessary(url), captureDate, retrievalDate, accessGroup); }
private CaptureSearchResult searchElementToCaptureSearchResult(Node e) { CaptureSearchResult result = new CaptureSearchResult(); addNodeDataToSearchResult(e,result); return result; }
private boolean isBlocked(ObjectFilter<CaptureSearchResult> filter, String url) { CaptureSearchResult result = new CaptureSearchResult(); result.setOriginalUrl(url); int filterResult = filter.filterObject(result); if(filterResult == ObjectFilter.FILTER_EXCLUDE) { return true; } return false; }