private String extractLangTitle(DocumentMetadata dm) { List<String> titleList = new ArrayList<String>(); for (TextWithLanguage title : dm.getBasicMetadata().getTitleList()) { if (language.equalsIgnoreCase(title.getLanguage())) { titleList.add(title.getText()); } } String docTitle; switch (titleList.size()) { case 0: logger.info("No title IN GIVEN LANG (" + language + ") out of " + dm.getBasicMetadata().getTitleCount() + " titles. Ignoring record!"); return null; case 1: docTitle = titleList.get(0); break; default: logger.info("Number of titles IN GIVEN LANGUAGE (" + language + ") is more then one. " + "Titles will be concatenated"); docTitle = Joiner.on(" ").join(titleList); break; } if (docTitle.trim().isEmpty()) { return null; } return docTitle; }
for (TextWithLanguage title : dm.getBasicMetadata().getTitleList()) { titles.add(title.getText());
@Override public Map exec(Tuple input) throws IOException { try { DataByteArray protoMetadata = (DataByteArray) input.get(0); DocumentMetadata metadata = DocumentMetadata.parseFrom(protoMetadata.get()); String titles; String abstracts; List<String> titleList = new ArrayList<String>(); for (TextWithLanguage title : metadata.getBasicMetadata().getTitleList()) { titleList.add(title.getText()); } titles = Joiner.on(" ").join(titleList); List<String> abstractsList = new ArrayList<String>(); for (TextWithLanguage documentAbstract : metadata.getBasicMetadata().getTitleList()) { abstractsList.add(documentAbstract.getText()); } abstracts = Joiner.on(" ").join(abstractsList); Map<String, Object> map = new HashMap<String, Object>(); map.put("key", metadata.getKey()); map.put("title", titles); map.put("keywords", getConcatenated(metadata.getKeywordsList())); map.put("abstract", abstracts); map.put("categories", getCategories(metadata.getBasicMetadata().getClassifCodeList())); return map; } catch (Exception e) { logger.error("Error in processing input row:", e); throw new IOException("Caught exception processing input row:\n" + StackTraceExtractor.getStackTrace(e)); } }
for(TextWithLanguage twl : dm.getBasicMetadata().getTitleList()){ if(twl.getLanguage().toLowerCase().startsWith("en")){ title=twl.getText();
for (TextWithLanguage title : dm.getBasicMetadata().getTitleList()) { titleList.add(title.getText()); for (TextWithLanguage documentAbstract : dm.getBasicMetadata().getTitleList()) { abstractsList.add(documentAbstract.getText());
for (TextWithLanguage title : dm.getBasicMetadata().getTitleList()) { titleList.add(title.getText()); for (TextWithLanguage documentAbstract : dm.getBasicMetadata().getTitleList()) { abstractsList.add(documentAbstract.getText());
for (TextWithLanguage title : dm.getBasicMetadata().getTitleList()) { titleList.add(title.getText()); for (TextWithLanguage documentAbstract : dm.getBasicMetadata().getTitleList()) { abstractsList.add(documentAbstract.getText());
for (TextWithLanguage title : metadata.getBasicMetadata().getTitleList()) { titleList.add(title.getText()); for (TextWithLanguage documentAbstract : metadata.getBasicMetadata().getTitleList()) { abstractsList.add(documentAbstract.getText());
for (TextWithLanguage title : metadata.getBasicMetadata().getTitleList()) { titleList.add(title.getText()); for (TextWithLanguage documentAbstract : metadata.getBasicMetadata().getTitleList()) { abstractsList.add(documentAbstract.getText());
private Tuple addDocumentMetatdataFields(DocumentMetadata metadata, Tuple output) throws ExecException { output.set(fieldNumberMap.get(C.KEY), metadata.getKey()); appendToOutput(output, C.TITLE, metadata.getBasicMetadata() .getTitleList()); appendToOutput(output, C.ABSTRACT_TEXT, metadata.getDocumentAbstractList()); List<String> al = new ArrayList<String>(); for (KeywordsList kl : metadata.getKeywordsList()) { for (String s : kl.getKeywordsList()) { al.add(removeAllPigUnfriendlySigns(s)); } } output.set(fieldNumberMap.get(C.KEYWORDS), listToDataBag(al)); List<String> authorKeys = new ArrayList<String>(); List<String> authorNames = new ArrayList<String>(); for (Author author : metadata.getBasicMetadata().getAuthorList()) { authorKeys.add(author.getKey()); authorNames.add(author.getName()); } output.set(fieldNumberMap.get(C.CONTRIBUTORS), listToDataBag(authorKeys, authorNames)); return output; }
private void convertTitles(DocumentWrapper dw, SolrInputDocument doc) { List<TextWithLanguage> titles = dw.getDocumentMetadata() .getBasicMetadata().getTitleList(); for (TextWithLanguage title : titles) { doc.addField(SolrIndexConstants.DOC_TITLE_FIELD_NAME, title.getText()); } }