public void process() throws FileSystemException, JAXBException, FileNotFoundException, UnsupportedEncodingException, IOException { TarArchiveInputStream tais=new TarArchiveInputStream(new GZIPInputStream(new FileInputStream(inputTar))); TarArchiveEntry entry; while ((entry = tais.getNextTarEntry()) != null) { if (entry.isFile() && entry.getName().endsWith("xml")) { byte[] content=IOUtils.toByteArray(tais); List<Builder> b = orcidReader.parseOrcid(new ByteArrayInputStream(content)); for (Builder bo : b) { connector.store(bo.build()); } } } }
return dw.build();
@Override public Tuple exec(Tuple tuple) throws IOException { checkCorrectness(tuple); try{ DocumentWrapper.Builder dwb = mainBlockParsing(tuple); int i = -1; for(String s : actions){ i++; if(i == mainGroupIndex) continue; try { IMerge merger = (IMerge) Class.forName("pl.edu.icm.coansys.output.merge.all.strategies."+MergeMapping.hm.get(s)).newInstance(); dwb = merger.execute(tuple, 2*i+1, dwb); } catch (Exception e) { LOGGER.error(ERROR_STRING, e); } } Tuple result = tupleFactory.newTuple(); result.append(docId); result.append(new DataByteArray(dwb.build().toByteArray())); return result; }catch(IOException e){ LOGGER.error(StackTraceExtractor.getStackTrace(e), e); throw e; } }
bw=((DocumentProtos.DocumentWrapper) dwo).toByteArray(); } else { bw=((DocumentProtos.DocumentWrapper.Builder) dwo).build().toByteArray();
context.write(outKey, new BytesWritable(builder.build().toByteArray())); return; } else {
dwb.setMediaContainer(mc); return dwb.build();
t.append(new DataByteArray(commonDocumentWrapper.build().toByteArray()));
public Tuple exec(Tuple input) throws IOException { if (input == null || input.size() != 3) { return null; } try{ String key = (String) input.get(0); DocumentWrapper dw = DocumentWrapper.parseFrom(((DataByteArray) input.get(1)).get()); String correctedDoi = (String) input.get(2); DocumentWrapper.Builder dwb = DocumentWrapper.newBuilder(dw); DocumentMetadata.Builder dmb = DocumentMetadata.newBuilder(dw.getDocumentMetadata()); BasicMetadata.Builder bmb = BasicMetadata.newBuilder(dmb.getBasicMetadata()); bmb.setDoi(correctedDoi); dmb.setBasicMetadata(bmb); dwb.setDocumentMetadata(dmb); Tuple ret = TupleFactory.getInstance().newTuple(); ret.append(key); ret.append(new DataByteArray(dwb.build().toByteArray())); return ret; }catch(Exception e){ logger.error("Error in processing input row:"+ StackTraceExtractor.getStackTrace(e), e); throw new IOException("Caught exception processing input row:\n" + StackTraceExtractor.getStackTrace(e)); } } }
t.append(new DataByteArray(documentWrapper.build().toByteArray())); return t;
/** * Put metadata as pl.edu.icm.coansys.models.DocumentProtos.Media along with * content as java.util.List<pl.edu.icm.coansys.models.DocumentProtos.Media> * * @param rowId * @param metadata * @param content */ public void put(String rowId, Media metadata, List<Media> content) { final DocumentProtos.DocumentWrapper.Builder dwb = DocumentProtos.DocumentWrapper .newBuilder(); final DocumentProtos.MediaContainer.Builder mcb = DocumentProtos.MediaContainer .newBuilder(); dwb.setRowId(rowId); mcb.addMedia(metadata); addContent(content, mcb); dwb.setMediaContainer(mcb); put(dwb.build()); }
private DocumentWrapper prepareMetadataAndContent(final List<Row> rows) throws IOException { final DocumentWrapper documentWrapper = prepareDocumentWrapper(rows, HBaseConstant.FAMILY_METADATA , HBaseConstant.FAMILY_METADATA_DOCUMENT_QUALIFIER_PROTO); final DocumentWrapper documentWrapperContent = prepareDocumentWrapper(rows, HBaseConstant.FAMILY_CONTENT , HBaseConstant.FAMILY_CONTENT_QUALIFIER_PROTO); if (null != documentWrapper && null != documentWrapperContent) { final DocumentProtos.DocumentWrapper.Builder dwb = DocumentProtos.DocumentWrapper.newBuilder(); final DocumentProtos.MediaContainer.Builder mcb = DocumentProtos.MediaContainer.newBuilder(); dwb.setRowId(documentWrapper.getRowId()); mcb.addAllMedia(documentWrapper.getMediaContainer().getMediaList()); mcb.addAllMedia(documentWrapperContent.getMediaContainer().getMediaList()); dwb.setMediaContainer(mcb); return dwb.build(); } return null; }
private DocumentWrapper documentWrapper(String id, String bwmeta) { final DocumentProtos.Media media = DocumentProtos.Media.newBuilder() .setKey("bwmeta2") .setMediaType("application/xml") .setContent(ByteString.copyFromUtf8(bwmeta)) .build(); final DocumentProtos.MediaContainer.Builder mediaContainer = DocumentProtos.MediaContainer.newBuilder() .addMedia(media); return DocumentProtos.DocumentWrapper.newBuilder() .setRowId(id) .setMediaContainer(mediaContainer) .build(); }
private DocumentWrapper convertMediaToBw2Proto(List<Media> mediaList, String documentKey) { final DocumentProtos.DocumentWrapper.Builder dwb = DocumentProtos.DocumentWrapper .newBuilder(); dwb.setRowId(documentKey); dwb.setMediaContainer(addMedia(mediaList)); return dwb.build(); }
@Override public Tuple exec(Tuple tuple) throws IOException { checkCorrectness(tuple); String rowId = (String) tuple.get(0); DataByteArray origDocDBA = (DataByteArray) tuple.get(1); DocumentWrapper.Builder dwb = DocumentWrapper.newBuilder(DocumentWrapper.parseFrom(origDocDBA.get())); dwb = merger.execute(tuple, 3, dwb); DataByteArray resultDocDBA = new DataByteArray(dwb.build().toByteArray()); Tuple result = TupleFactory.getInstance().newTuple(); result.append(rowId); result.append(resultDocDBA); return result; }
public Pair<ImmutableBytesWritable, Put> prepareDocument(final byte[] valueBytes, final String newId_url , final String recordKey, final String recordValue) { final Pair<ImmutableBytesWritable, Put> keyValuePair = new Pair<ImmutableBytesWritable, Put>(); final DocumentProtos.MediaContainer.Builder mcb = DocumentProtos.MediaContainer.newBuilder(); mcb.addMedia(prepareMedia(valueBytes, recordKey, recordValue)); keyValuePair.setSecond(DocumentWrapper2HBasePut.translate(prepareDocumentWrapperBuilder(newId_url, mcb).build())); keyValuePair.setFirst(new ImmutableBytesWritable(newId_url.getBytes())); return keyValuePair; }
public DocumentWrapper toDocumentWrapper(byte[] rowid, byte[] mproto, byte[] cproto) throws ExecException, InvalidProtocolBufferException { dw.setRowId(Bytes.toString(rowid)); if (mproto != null) { dw.setDocumentMetadata(DocumentMetadata.parseFrom(mproto)); } if (cproto != null) { dw.setMediaContainer(MediaContainer.parseFrom(cproto)); } return dw.build(); }
/** * Returns the clone of the passed DocumentWrapper with filled {@link DocumentWrapper#getDocumentMetadata()} and {@link DocumentWrapper#getRowId()} only */ public static DocumentWrapper cloneDocumentMetadata(DocumentWrapper docWrapper) { return DocumentWrapper.newBuilder().setDocumentMetadata(docWrapper.getDocumentMetadata()).setRowId(docWrapper.getRowId()).build(); }
public static DocumentWrapper translate(DocumentDTO docDTO) { DocumentWrapper.Builder dw = DocumentWrapper.newBuilder(); dw.setRowId(RowComposer.composeRow(docDTO)); dw.setDocumentMetadata(docDTO.getDocumentMetadata()); dw.setMediaContainer(docDTO.getMediaConteiner()); return dw.build(); }