private static String compactTitle(DocumentProtos.DocumentMetadata doc) { String docKey = DocumentWrapperUtils.getMainTitle(doc); return StringTools.normalize(docKey); }
private static String format(Writable key, DocumentWrapper documentWrapper) { StringBuilder sb = new StringBuilder(); sb.append("-------------------------------------------\n"); sb.append("key : ").append(key).append("\n"); sb.append("rowid : ").append(documentWrapper.getRowId()).append("\n"); sb.append("title0 : ").append(DocumentWrapperUtils.getMainTitle(documentWrapper.getDocumentMetadata())).append("\n"); sb.append("year : ").append(DocumentWrapperUtils.getPublicationYear(documentWrapper)).append("\n"); for (Author author : documentWrapper.getDocumentMetadata().getBasicMetadata().getAuthorList()) { sb.append(author.getPositionNumber()).append(". ").append(author.getName()).append(" ").append(author.getSurname()).append("\n"); } sb.append("\n"); return sb.toString(); }
/** * Generates key for the given {@link DocumentWrapper} * @param level influences the keyLength, the keyLength is a multiplication of the level and {@link #KEY_PART_LENGTH} */ @Override public String generateKey(DocumentProtos.DocumentMetadata doc) { String docKey = DocumentWrapperUtils.getMainTitle(doc); docKey = StringTools.normalize(docKey); docKey = StringTools.removeStopWords(docKey); docKey = docKey.replaceAll("\\s", ""); StringBuilder oddCharsSB = new StringBuilder(); for (int i=0; i < docKey.length(); i += 2) { oddCharsSB.append(docKey.charAt(i)); } docKey = oddCharsSB.toString(); int keyLength = KEY_PART_LENGTH; if (docKey.length() > keyLength) { docKey = docKey.substring(0, keyLength); } return docKey; } }
private String getNormalizedTitle(DocumentProtos.DocumentMetadata doc) { String title = DocumentWrapperUtils.getMainTitle(doc); title = StringTools.normalize(title); title = StringTools.replaceNumbersToDecimal(title); title = StringTools.normalizePartQualifiers(title); return title; }