private String getNormalizedTitle(DocumentProtos.DocumentMetadata doc) { String title = DocumentWrapperUtils.getMainTitle(doc); title = StringTools.normalize(title); title = StringTools.replaceNumbersToDecimal(title); title = StringTools.normalizePartQualifiers(title); return title; }
private static String compactTitle(DocumentProtos.DocumentMetadata doc) { String docKey = DocumentWrapperUtils.getMainTitle(doc); return StringTools.normalize(docKey); }
/** * Generates key for the given {@link DocumentWrapper} * @param level influences the keyLength, the keyLength is a multiplication of the level and {@link #KEY_PART_LENGTH} */ @Override public String generateKey(DocumentProtos.DocumentMetadata doc) { String docKey = DocumentWrapperUtils.getMainTitle(doc); docKey = StringTools.normalize(docKey); docKey = StringTools.removeStopWords(docKey); docKey = docKey.replaceAll("\\s", ""); StringBuilder oddCharsSB = new StringBuilder(); for (int i=0; i < docKey.length(); i += 2) { oddCharsSB.append(docKey.charAt(i)); } docKey = oddCharsSB.toString(); int keyLength = KEY_PART_LENGTH; if (docKey.length() > keyLength) { docKey = docKey.substring(0, keyLength); } return docKey; } }
@Override public Vote vote(DocumentProtos.DocumentMetadata doc1, DocumentProtos.DocumentMetadata doc2) { String issn1 = extractIssn(doc1); String issn2 = extractIssn(doc2); if (issn1 != null && !issn1.isEmpty() && issn1.equals(issn2)) { return new Vote(Vote.VoteStatus.PROBABILITY, 1.0f); } String journal1 = extractJournal(doc1); String journal2 = extractJournal(doc2); if (journal1 == null || journal2 == null) { return new Vote(Vote.VoteStatus.ABSTAIN); } journal1 = StringTools.normalize(journal1); journal2 = StringTools.normalize(journal2); //SimilarityCalculator calculator = new LCSSimilarity(); SimilarityCalculator calculator = new EditDistanceSimilarity(approveLevel, disapproveLevel); float similarity = calculator.calculateSimilarity(journal1, journal2); if (similarity > 0) { return new Vote(Vote.VoteStatus.PROBABILITY, similarity); } else { return new Vote(Vote.VoteStatus.NOT_EQUALS); } }
surname = StringTools.normalize(surname);