public String serialize(CaptureSearchResult result) { String r = result.getDigest(); return r == null ? DEFAULT_VALUE : r; } }
public String serialize(CaptureSearchResult result) { String r = result.getDigest(); return r == null ? DEFAULT_VALUE : r; } }
private int remember(CaptureSearchResult o) { memory.put(o.getDigest(),o); return FILTER_INCLUDE; }
private int remember(CaptureSearchResult o) { memory.put(o.getDigest(),o); return FILTER_INCLUDE; }
public int filterObject(CaptureSearchResult o) { if(o.getFile().equals(EMPTY_VALUE)) { if(o.getDigest().equals(EMPTY_SHA1)) { return annotate(o); } return FILTER_INCLUDE; } return remember(o); }
public int filterObject(CaptureSearchResult o) { if(o.getFile().equals(EMPTY_VALUE)) { if(o.getDigest().equals(EMPTY_SHA1)) { return annotate(o); } return FILTER_INCLUDE; } return remember(o); }
private int annotate(CaptureSearchResult o) { o.flagDuplicateDigest(); String thisDigest = o.getDigest(); CaptureSearchResult last = memory.get(thisDigest); if (last == null) { if (LOGGER.isLoggable(Level.FINER)) { LOGGER.finer("did not find matching digest in previous fetch of url, hopefully it's a new-style revisit - " + o.getCaptureTimestamp() + " " + o.getOriginalUrl()); } return FILTER_INCLUDE; } o.flagDuplicateDigest(last); return FILTER_INCLUDE; }
private int annotate(CaptureSearchResult o) { o.flagDuplicateDigest(); String thisDigest = o.getDigest(); CaptureSearchResult last = memory.get(thisDigest); if (last == null) { if (LOGGER.isLoggable(Level.FINER)) { LOGGER.finer("did not find matching digest in previous fetch of url, hopefully it's a new-style revisit - " + o.getCaptureTimestamp() + " " + o.getOriginalUrl()); } return FILTER_INCLUDE; } o.flagDuplicateDigest(last); return FILTER_INCLUDE; }
sb.append(result.getHttpCode()); sb.append(DELIMITER); sb.append(result.getDigest()); sb.append(DELIMITER); sb.append(result.getRedirectUrl());
sb.append(result.getHttpCode()); sb.append(DELIMITER); sb.append(result.getDigest()); sb.append(DELIMITER); sb.append(result.getRedirectUrl());
valSB.append(result.getHttpCode()); valSB.append(DELIMITER); valSB.append(result.getDigest()); valSB.append(DELIMITER); valSB.append(result.getRedirectUrl());
valSB.append(result.getHttpCode()); valSB.append(DELIMITER); valSB.append(result.getDigest()); valSB.append(DELIMITER); valSB.append(result.getRedirectUrl());
@Override public int filterObject(CaptureSearchResult o) { String thisHash = o.getDigest(); int result = FILTER_INCLUDE; // Only start filtering after minThreshold captures if (++numCaptures <= minThreshold) { return result; } Integer count = cache.remove(thisHash); if (count == null) { cache.put(thisHash, 1); } else { if (count >= maxDupeHashes) { result = FILTER_EXCLUDE; cache.put(thisHash, count); } else { cache.put(thisHash, count + 1); } } return result; }
@Override public int filterObject(CaptureSearchResult o) { String thisHash = o.getDigest(); int result = FILTER_INCLUDE; // Only start filtering after minThreshold captures if (++numCaptures <= minThreshold) { return result; } Integer count = cache.remove(thisHash); if (count == null) { cache.put(thisHash, 1); } else { if (count >= maxDupeHashes) { result = FILTER_EXCLUDE; cache.put(thisHash, count); } else { cache.put(thisHash, count + 1); } } return result; }
private int annotate(CaptureSearchResult o) { if(lastSeen == null) { // TODO: log missing record digest reference return FILTER_EXCLUDE; } o.setFile(lastSeen.getFile()); o.setOffset(lastSeen.getOffset()); o.setDigest(lastSeen.getDigest()); o.setHttpCode(lastSeen.getHttpCode()); o.setMimeType(lastSeen.getMimeType()); o.setRedirectUrl(lastSeen.getRedirectUrl()); o.flagDuplicateHTTP(lastSeen.getCaptureTimestamp()); return FILTER_INCLUDE; }
private int annotate(CaptureSearchResult o) { if(lastSeen == null) { // TODO: log missing record digest reference return FILTER_EXCLUDE; } o.setFile(lastSeen.getFile()); o.setOffset(lastSeen.getOffset()); o.setDigest(lastSeen.getDigest()); o.setHttpCode(lastSeen.getHttpCode()); o.setMimeType(lastSeen.getMimeType()); o.setRedirectUrl(lastSeen.getRedirectUrl()); o.flagDuplicateHTTP(lastSeen.getCaptureTimestamp()); return FILTER_INCLUDE; }