private static String format(Writable key, DocumentWrapper documentWrapper) { StringBuilder sb = new StringBuilder(); sb.append("-------------------------------------------\n"); sb.append("key : ").append(key).append("\n"); sb.append("rowid : ").append(documentWrapper.getRowId()).append("\n"); sb.append("title0 : ").append(DocumentWrapperUtils.getMainTitle(documentWrapper.getDocumentMetadata())).append("\n"); sb.append("year : ").append(DocumentWrapperUtils.getPublicationYear(documentWrapper)).append("\n"); for (Author author : documentWrapper.getDocumentMetadata().getBasicMetadata().getAuthorList()) { sb.append(author.getPositionNumber()).append(". ").append(author.getName()).append(" ").append(author.getSurname()).append("\n"); } sb.append("\n"); return sb.toString(); }
private static Integer extractIssue(DocumentProtos.DocumentMetadata doc) { DocumentProtos.BasicMetadata basicMetadata = doc.getBasicMetadata(); if (basicMetadata.hasIssue()) { String issueStr = basicMetadata.getIssue(); try { return Integer.parseInt(issueStr); } catch (NumberFormatException ex) { log.warn("Cannot parse issue: " + issueStr); return null; } } else { return null; } } }
/** * wrapper for documentWrapper.getDocumentMetadata().getBasicMetadata().getAuthorList(); Never returns null */ public static List<Author> getAuthors(DocumentWrapper documentWrapper) { List<Author> authors = documentWrapper.getDocumentMetadata().getBasicMetadata().getAuthorList(); if (authors==null) { authors = Lists.newArrayList(); } return authors; }
private Tuple addDocumentMetatdataFields(DocumentMetadata metadata, Tuple output) throws ExecException { output.set(fieldNumberMap.get(C.KEY), metadata.getKey()); appendToOutput(output, C.TITLE, metadata.getBasicMetadata() .getTitleList()); appendToOutput(output, C.ABSTRACT_TEXT, metadata.getDocumentAbstractList()); List<String> al = new ArrayList<String>(); for (KeywordsList kl : metadata.getKeywordsList()) { for (String s : kl.getKeywordsList()) { al.add(removeAllPigUnfriendlySigns(s)); } } output.set(fieldNumberMap.get(C.KEYWORDS), listToDataBag(al)); List<String> authorKeys = new ArrayList<String>(); List<String> authorNames = new ArrayList<String>(); for (Author author : metadata.getBasicMetadata().getAuthorList()) { authorKeys.add(author.getKey()); authorNames.add(author.getName()); } output.set(fieldNumberMap.get(C.CONTRIBUTORS), listToDataBag(authorKeys, authorNames)); return output; }
private void convertPersonIds(DocumentWrapper dw, SolrInputDocument doc) { List<Author> authors = dw.getDocumentMetadata().getBasicMetadata() .getAuthorList(); for (Author author : authors) { List<KeyValue> extIds = author.getExtIdList(); for (KeyValue extId : extIds) { if (extId .getKey() .equals(CoansysModulesConstants.COANSYS_MODULE_ID_DISAMBIGUATION_AUTHOR)) { doc.addField(SolrIndexConstants.DOC_PERSONID_FIELD_NAME, extId.getValue()); } } } } }
private static String extractIssn(DocumentProtos.DocumentMetadata doc) { DocumentProtos.BasicMetadata basicMetadata = doc.getBasicMetadata(); return basicMetadata.hasIssn() ? basicMetadata.getIssn() : null; }
private void convertParentExtIds(DocumentWrapper dw, SolrInputDocument doc) { String issn = dw.getDocumentMetadata().getBasicMetadata().getIssn(); if (StringUtils.isNotBlank(issn)) { doc.addField(SolrIndexConstants.DOC_PARENT_EXTID_FIELD_NAME, SolrIndexConstants.DOC_PARENT_EXTID_KEY_ISSN + SolrIndexConstants.FIELD_VALUES_SEPARATOR + issn); } String isbn = dw.getDocumentMetadata().getBasicMetadata().getIsbn(); if (StringUtils.isNotBlank(isbn)) { doc.addField(SolrIndexConstants.DOC_PARENT_EXTID_FIELD_NAME, SolrIndexConstants.DOC_PARENT_EXTID_KEY_ISBN + SolrIndexConstants.FIELD_VALUES_SEPARATOR + isbn); } }
private static String extractJournal(DocumentProtos.DocumentMetadata doc) { DocumentProtos.BasicMetadata basicMetadata = doc.getBasicMetadata(); return basicMetadata.hasJournal() ? basicMetadata.getJournal() : null; }
private void convertYear(DocumentWrapper dw, SolrInputDocument doc) { String year = dw.getDocumentMetadata().getBasicMetadata().getYear(); doc.setField(SolrIndexConstants.DOC_YEAR_FIELD_NAME, year); }
@Override protected String extractNumbersString(DocumentProtos.DocumentMetadata doc) { return doc.getBasicMetadata().getPages(); } }
private void convertDocYear(DocumentWrapper dw, SolrInputDocument doc) { String year = dw.getDocumentMetadata().getBasicMetadata().getYear(); doc.setField(SolrIndexConstants.CONTRIBUTION_DOCYEAR_FIELD_NAME, year); } }
@Override protected String extractNumbersString(DocumentProtos.DocumentMetadata doc) { return doc.getBasicMetadata().getYear(); } }
@Override public Tuple exec(Tuple tuple) throws IOException { DataByteArray dba = (DataByteArray) tuple.get(1); DocumentProtos.DocumentWrapper docWrapper = DocumentProtos.DocumentWrapper.parseFrom(dba.get()); String id = docWrapper.getDocumentMetadata().getKey(); String title = docWrapper.getDocumentMetadata().getBasicMetadata().getTitle(0).getText(); Tuple retTuple = TupleFactory.getInstance().newTuple(Arrays.asList(id, title)); return retTuple; } }
public Collection<SolrInputDocument> convert(DocumentWrapper dw) { Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>(0); List<Author> contributions = dw.getDocumentMetadata() .getBasicMetadata().getAuthorList(); for (Author contribution : contributions) { SolrInputDocument doc = convertContribution(contribution); convertDocExtIds(dw, doc); convertDocYear(dw, doc); docs.add(doc); } return docs; }
/** * documentWrapper.getDocumentMetadata().getBasicMetadata().getYear() */ public static String getPublicationYear(DocumentWrapper documentWrapper) { return documentWrapper.getDocumentMetadata().getBasicMetadata().getYear(); }
private void convertParentTitles(DocumentWrapper dw, SolrInputDocument doc) { String parentTitle = dw.getDocumentMetadata().getBasicMetadata() .getJournal(); if (StringUtils.isNotBlank(parentTitle)) { doc.addField(SolrIndexConstants.DOC_PARENT_TITLE_FIELD_NAME, parentTitle); } }
@Override protected String extractNumbersString(DocumentProtos.DocumentMetadata doc) { DocumentProtos.BasicMetadata basicMetadata = doc.getBasicMetadata(); return basicMetadata.getVolume() + " " + basicMetadata.getIssue(); } }
private void convertAuthors(DocumentWrapper dw, SolrInputDocument doc) { List<Author> authors = dw.getDocumentMetadata().getBasicMetadata() .getAuthorList(); for (Author author : authors) { doc.addField(SolrIndexConstants.DOC_AUTHOR_FIELD_NAME, author.getName()); } }
private void convertTitles(DocumentWrapper dw, SolrInputDocument doc) { List<TextWithLanguage> titles = dw.getDocumentMetadata() .getBasicMetadata().getTitleList(); for (TextWithLanguage title : titles) { doc.addField(SolrIndexConstants.DOC_TITLE_FIELD_NAME, title.getText()); } }
/** * documentWrapper.getDocumentMetadata().getBasicMetadata().getTitle(0).getText() * * */ public static String getMainTitle(DocumentProtos.DocumentMetadata documentMetadata) { BasicMetadata basicMetadata = documentMetadata.getBasicMetadata(); if (basicMetadata.getTitleCount() > 0) { return basicMetadata.getTitle(0).getText(); } else { return ""; } }