@Override public boolean transform(DocumentProtos.Media media, String docNewId, MultiTypeParseResult result, DocumentProtos.MediaContainerOrBuilder mediaToCopy) { try { log.info("UMultiRank organization parsing "+docNewId); String id=docNewId; result.add(reader.read(new InputStreamReader(new ByteArrayInputStream(media.getContent().toByteArray()),"UTF-8"), id)); return true; } catch (UnsupportedEncodingException ex) { log.error(ex.getMessage(),ex); } return false; } }
Document doc = db.parse(media.getContent().newInput());
@Override public boolean transform(DocumentProtos.Media media, String docNewId, DocumentProtos.DocumentMetadata.Builder dmBuider,DocumentProtos.DocumentWrapper.Builder builder) { try { List<DocumentProtos.DocumentWrapperOrBuilder> docs = reader.read(media.getContent().toStringUtf8()); if (docs.size() == 1) { DocumentProtos.DocumentMetadata dm = docs.get(0).getDocumentMetadata(); dmBuider.mergeFrom(dm); dmBuider.clearCollection(); return true; } else { LOGGER.error("There was exactly one record in input string; number of output items: " + docs.size()); } } catch (Exception ex) { LOGGER.error("Error: ", ex); } return false; } }
InputStream pdfIS = media.getContent().newInput(); try {
NlmToYTransformer jatsReader=new NlmToYTransformer(); String art=new String(media.getContent().toByteArray(),"UTF-8"); List<YExportable> exps=null; if (art.contains(" PUBLIC \"-//NLM//DTD JATS ")) {
if (media.getMediaType().equals(ProtoConstants.mediaTypePdf)) { try { sb.append(extractTextFromPdf(media.getContent().toByteArray(), this.lang)); } catch (Exception ex) { logger.error("Cannot extract text from PDF: " + ex.toString() + " " + media.getSourcePath()); sb.append(filterTextByLang(media.getContent().toStringUtf8(), lang.getLangCode()));
OafId rootId=new OafId(docNewId); rootId.isComacId=true; List<MultiTypeParseResult> readResults=reader.read(new String(media.getContent().toByteArray(),"UTF-8"),rootId,idTranslator); readResults.stream().forEach((MultiTypeParseResult t) -> { t.getDocuments().forEach((DocumentProtos.DocumentWrapperOrBuilder b) -> {
@Override public boolean transform(DocumentProtos.Media media, String docNewId, MultiTypeParseResult result, DocumentProtos.MediaContainerOrBuilder mediaToCopy) { try { log.info("Opeanir records parsing: "+docNewId); PbnId rootId=new PbnId(docNewId); rootId.isComacId=true; List<MultiTypeParseResult> readResults=reader.read(new String(media.getContent().toByteArray(),"UTF-8"),rootId); readResults.stream().forEach((MultiTypeParseResult t) -> { t.getDocuments().forEach((DocumentProtos.DocumentWrapperOrBuilder b) -> { result.add(b); }); t.getPersons().forEach((PersonProtos.PersonWrapperOrBuilder b) -> { result.add(b); }); t.getProjects().forEach((ProjectProtos.ProjectWrapperOrBuilder b) -> { result.add(b); }); t.getOrganizations().forEach((OrganizationProtos.OrganizationWrapperOrBuilder b) -> { result.add(b); }); }); return readResults.size()>=1; } catch (UnsupportedEncodingException ex) { log.error(ex.getMessage(),ex); } return false; } }
@Override public boolean transform(Media media, String docNewId, DocumentMetadata.Builder dmBuider,DocumentProtos.DocumentWrapper.Builder builder) { byte[] b = media.getContent().toByteArray(); byte[] c = Arrays.copyOf(b, b.length); ScholarRecordP record; BasicMetadata.Builder bmBuilder = BasicMetadata.newBuilder(); try { record = ScholarRecordP.parseFrom(c); } catch (InvalidProtocolBufferException ex) { java.util.logging.Logger.getLogger(GsMediaToBw2Metadata.class.getName()).log(Level.SEVERE, null, ex); return false; } AuthorData ad = new AuthorData(); ad.docId = docNewId; if (translateGoogleScholarToDocumentMetadata(record, dmBuider, bmBuilder, ad)) { dmBuider.setBasicMetadata(bmBuilder); return true; } return false; } }
@Override public boolean transform(DocumentProtos.Media media, String docNewId, DocumentProtos.DocumentMetadata.Builder dmBuider,DocumentProtos.DocumentWrapper.Builder builder) { try { String mediaCF=media.getContent().toStringUtf8(); List<DocumentProtos.DocumentWrapperOrBuilder> docs = reader.read(mediaCF); if (docs.size() == 1) { DocumentProtos.DocumentMetadata dm = docs.get(0).getDocumentMetadata(); dmBuider.mergeFrom(dm); dmBuider.clearCollection(); List<YExportable> list=yreader.read(mediaCF); addBwmetaMedia(list, docNewId, builder); return true; } else { LOGGER.error("There was exactly one record in input string; number of output items: " + docs.size()); } } catch (Exception ex) { LOGGER.error("Error: ", ex); } return false; }
@Override public boolean transform(DocumentProtos.Media media, String docNewId, DocumentProtos.DocumentMetadata.Builder dmBuider,DocumentProtos.DocumentWrapper.Builder builder) { List<YExportable> yExportableList = MetadataTransformers.BTF.getReader( BwmetaTransformerConstants.BWMETA_2_1, BwmetaTransformerConstants.Y).read( new InputStreamReader(media.getContent().newInput())); for (YExportable yExportable : yExportableList) { DocumentProtos.DocumentMetadata dm = parser .yelementToDocumentMetadata((YElement) yExportable, null, null, "synat"); dmBuider.mergeFrom(dm); } dmBuider.clearCollection(); return true; }
private static BWMetaFile fromMedia(String id, Media media) { Resource resource = new InMemoryResource(media.getContent().toByteArray(), getDescription(id, media)); return new BWMetaFile(id, StringUtils.EMPTY, resource, null); }
@Override public boolean transform(DocumentProtos.Media media, String docNewId, DocumentProtos.DocumentMetadata.Builder dmBuider,DocumentProtos.DocumentWrapper.Builder builder) { List<DocumentProtos.DocumentWrapperOrBuilder> dwbList = reader.read(new InputStreamReader(media.getContent().newInput()), new DataciteOAIId(docNewId)); for (DocumentProtos.DocumentWrapperOrBuilder dw: dwbList) { dmBuider.mergeFrom(dw.getDocumentMetadata()); } dmBuider.clearCollection(); return true; }