pl.edu.icm.coansys.models.DocumentProtos$BasicMetadata java code examples

  doi = dm.getBasicMetadata().getDoi().replaceAll("\\s++", " ").trim();
  if(doi.length()==0){
    throw new Exception("Lack of doi");
for (Author a : dm.getBasicMetadata().getAuthorList()) {
  try {
    String sname = a.getSurname();

  for(TextWithLanguage twl : dm.getBasicMetadata().getTitleList()){
    if(twl.getLanguage().toLowerCase().startsWith("en")){
      title=twl.getText();
    title = dm.getBasicMetadata().getTitle(0).getText();
  doi = dm.getBasicMetadata().getDoi().replaceAll("\\s++", " ").trim();
}catch(Exception e){
}finally{
  year = dm.getBasicMetadata().getYear().replaceAll("\\s++", " ").trim();
}catch(Exception e){
}finally{

private String extractLangTitle(DocumentMetadata dm) {
  List<String> titleList = new ArrayList<String>();
  for (TextWithLanguage title : dm.getBasicMetadata().getTitleList()) {
    if (language.equalsIgnoreCase(title.getLanguage())) {
      titleList.add(title.getText());
    }
  }
  String docTitle;
  switch (titleList.size()) {
    case 0:
      logger.info("No title IN GIVEN LANG (" + language + ") out of " + dm.getBasicMetadata().getTitleCount()
          + " titles. Ignoring record!");
      return null;
    case 1:
      docTitle = titleList.get(0);
      break;
    default:
      logger.info("Number of titles IN GIVEN LANGUAGE (" + language + ") is more then one. "
          + "Titles will be concatenated");
      docTitle = Joiner.on(" ").join(titleList);
      break;
  }
  if (docTitle.trim().isEmpty()) {
    return null;
  }
  return docTitle;
}

for (TextWithLanguage title : dm.getBasicMetadata().getTitleList()) {
  titleList.add(title.getText());
for (TextWithLanguage documentAbstract : dm.getBasicMetadata().getTitleList()) {
  abstractsList.add(documentAbstract.getText());

for (ClassifCode code : dm.getBasicMetadata().getClassifCodeList()) {
  for (String co_str : code.getValueList()) {
    bagsize++;
  for (TextWithLanguage title : dm.getBasicMetadata().getTitleList()) {
    titleList.add(title.getText());
  for (TextWithLanguage documentAbstract : dm.getBasicMetadata().getTitleList()) {
    abstractsList.add(documentAbstract.getText());

if (dm.getBasicMetadata().getClassifCodeCount() > 0) {
  hasCateg = true;
  for (TextWithLanguage title : dm.getBasicMetadata().getTitleList()) {
    titleList.add(title.getText());
  for (TextWithLanguage documentAbstract : dm.getBasicMetadata().getTitleList()) {
    abstractsList.add(documentAbstract.getText());

private static Pair<String[], Boolean> extractSurnames(DocumentProtos.DocumentMetadata doc) {
  RegexpParser authorParser = new RegexpParser("authorParser.properties", "author");
  List<DocumentProtos.Author> authorList = doc.getBasicMetadata().getAuthorList();
  String[] resultByPositionNb = new String[authorList.size()];
  String[] resultByOrder = new String[authorList.size()];
  for (DocumentProtos.Author author : doc.getBasicMetadata().getAuthorList()) {
    String surname;
    if (author.hasSurname()) {

  doi = dm.getBasicMetadata().getDoi().replaceAll("\\s++", " ").trim();
}catch(Exception e){
}finally{

for (TextWithLanguage title : metadata.getBasicMetadata().getTitleList()) {
  titleList.add(title.getText());
for (TextWithLanguage documentAbstract : metadata.getBasicMetadata().getTitleList()) {
  abstractsList.add(documentAbstract.getText());
DataBag db = getCategories(metadata.getBasicMetadata().getClassifCodeList());
if (db.size() > 0) {
  Map<String, Object> map = new HashMap<String, Object>();

if(commonDocumentMetadata == null){
  commonDocumentMetadata = DocumentMetadata.newBuilder(documentMetadata);
  commonBasicMetadata = BasicMetadata.newBuilder(commonDocumentMetadata.getBasicMetadata()); 
  continue;
commonBasicMetadata.addAuthor(documentMetadata.getBasicMetadata().getAuthor(0));

for (TextWithLanguage title : dm.getBasicMetadata().getTitleList()) {
  titles.add(title.getText());

  , Bytes.toBytes(HBaseConstant.FAMILY_METADATA_DOCUMENT_QUALIFIER_PROTO)) != null) {
DocumentMetadata dm = DocumentMetadata.parseFrom(scannerResult.value());
for(Author a : dm.getBasicMetadata().getAuthorList()){
  names.add(a.getForenames() + " " + a.getSurname());

for (TextWithLanguage title : metadata.getBasicMetadata().getTitleList()) {
  titleList.add(title.getText());
for (TextWithLanguage documentAbstract : metadata.getBasicMetadata().getTitleList()) {
  abstractsList.add(documentAbstract.getText());
map.put("keywords", getConcatenated(metadata.getKeywordsList()));
map.put("abstract", abstracts);
DataBag db = getCategories(metadata.getBasicMetadata().getClassifCodeList());
map.put("categories", db);
long num = db.size();

@Override
public Map exec(Tuple input) throws IOException {
  try {
    DataByteArray protoMetadata = (DataByteArray) input.get(0);
    DocumentMetadata metadata = DocumentMetadata.parseFrom(protoMetadata.get());
    String titles;
    String abstracts;
    List<String> titleList = new ArrayList<String>();
    for (TextWithLanguage title : metadata.getBasicMetadata().getTitleList()) {
      titleList.add(title.getText());
    }
    titles = Joiner.on(" ").join(titleList);
    List<String> abstractsList = new ArrayList<String>();
    for (TextWithLanguage documentAbstract : metadata.getBasicMetadata().getTitleList()) {
      abstractsList.add(documentAbstract.getText());
    }
    abstracts = Joiner.on(" ").join(abstractsList);
    Map<String, Object> map = new HashMap<String, Object>();
    map.put("key", metadata.getKey());
    map.put("title", titles);
    map.put("keywords", getConcatenated(metadata.getKeywordsList()));
    map.put("abstract", abstracts);
    map.put("categories", getCategories(metadata.getBasicMetadata().getClassifCodeList()));
    return map;
  } catch (Exception e) {
    logger.error("Error in processing input row:", e);
    throw new IOException("Caught exception processing input row:\n"
        + StackTraceExtractor.getStackTrace(e));
  }
}

List<Author> aths = dw.getDocumentMetadata().getBasicMetadata().getAuthorList();

documentMetadata.addCollection("orcid");
BasicMetadata.Builder basicMetadata = BasicMetadata.newBuilder();
basicMetadata.setDoi(doi);

  private static String extractDOI(DocumentProtos.DocumentMetadata doc) {
    DocumentProtos.BasicMetadata basicMetadata = doc.getBasicMetadata();
    if (!basicMetadata.hasDoi()) {
      return null;
    }
    
    String rawDoi = basicMetadata.getDoi().trim();
    String[] splittedDoi = rawDoi.split("\\|");
    if (splittedDoi.length == 2 && (splittedDoi[0].equals(splittedDoi[1]) || splittedDoi[1].startsWith("issn"))) {
      rawDoi = splittedDoi[0];
    } else if (rawDoi.length() % 2 == 0) {
      String firstHalf = rawDoi.substring(0, rawDoi.length() / 2);
      String secondHalf = rawDoi.substring(rawDoi.length() / 2);
      if (firstHalf.equals(secondHalf)) {
        rawDoi = firstHalf;
      }
    }
    
    String doiregex = ".*?(10[.][0-9]{4,}[^\\s\"/<>]*/[^\\s\"]+[^\\s\"\\]\\.;]).*";
    Pattern doiPattern = Pattern.compile(doiregex);
    Matcher matcher = doiPattern.matcher(rawDoi);
    
    if (matcher.matches()) {
      String doi = matcher.group(1);
      return doi;
    } else {
      return null;
    }
  }
}

  public Tuple exec(Tuple input) throws IOException {
    if (input == null || input.size() != 3) {
      return null;
    }

    try{
      String key = (String) input.get(0);
      DocumentWrapper dw = DocumentWrapper.parseFrom(((DataByteArray) input.get(1)).get());
      String correctedDoi = (String) input.get(2);
          
      DocumentWrapper.Builder dwb = DocumentWrapper.newBuilder(dw); 
      DocumentMetadata.Builder dmb = DocumentMetadata.newBuilder(dw.getDocumentMetadata());
      BasicMetadata.Builder bmb = BasicMetadata.newBuilder(dmb.getBasicMetadata());
      bmb.setDoi(correctedDoi);
      
      dmb.setBasicMetadata(bmb);
      dwb.setDocumentMetadata(dmb);
      Tuple ret = TupleFactory.getInstance().newTuple();
      ret.append(key);
      ret.append(new DataByteArray(dwb.build().toByteArray()));

      return ret;
    }catch(Exception e){
      logger.error("Error in processing input row:"+ StackTraceExtractor.getStackTrace(e), e);
      throw new IOException("Caught exception processing input row:\n"
          + StackTraceExtractor.getStackTrace(e));
    }

  }
}

private Tuple addDocumentMetatdataFields(DocumentMetadata metadata,
    Tuple output) throws ExecException {
  output.set(fieldNumberMap.get(C.KEY), metadata.getKey());
  appendToOutput(output, C.TITLE, metadata.getBasicMetadata()
      .getTitleList());
  appendToOutput(output, C.ABSTRACT_TEXT,
      metadata.getDocumentAbstractList());
  List<String> al = new ArrayList<String>();
  for (KeywordsList kl : metadata.getKeywordsList()) {
    for (String s : kl.getKeywordsList()) {
      al.add(removeAllPigUnfriendlySigns(s));
    }
  }
  output.set(fieldNumberMap.get(C.KEYWORDS), listToDataBag(al));
  List<String> authorKeys = new ArrayList<String>();
  List<String> authorNames = new ArrayList<String>();
  for (Author author : metadata.getBasicMetadata().getAuthorList()) {
    authorKeys.add(author.getKey());
    authorNames.add(author.getName());
  }
  output.set(fieldNumberMap.get(C.CONTRIBUTORS),
      listToDataBag(authorKeys, authorNames));
  return output;
}

  @Override
  public boolean transform(Media media, String docNewId, DocumentMetadata.Builder dmBuider,DocumentProtos.DocumentWrapper.Builder builder) {
    byte[] b = media.getContent().toByteArray();
    byte[] c = Arrays.copyOf(b, b.length);
    ScholarRecordP record;
    BasicMetadata.Builder bmBuilder = BasicMetadata.newBuilder();
    try {
      record = ScholarRecordP.parseFrom(c);
    } catch (InvalidProtocolBufferException ex) {
      java.util.logging.Logger.getLogger(GsMediaToBw2Metadata.class.getName()).log(Level.SEVERE, null, ex);
      return false;
    }
    AuthorData ad = new AuthorData();
    ad.docId = docNewId;

    if (translateGoogleScholarToDocumentMetadata(record, dmBuider, bmBuilder, ad)) {
      dmBuider.setBasicMetadata(bmBuilder);
      return true;
    }
    return false;
  }
}

How to useDocumentProtos$BasicMetadata in pl.edu.icm.coansys.models

Best Java code snippets using pl.edu.icm.coansys.models.DocumentProtos$BasicMetadata (Showing top 20 results out of 315)

How to use
DocumentProtos$BasicMetadata
in
pl.edu.icm.coansys.models