public static JCas newCasFromFile(String filePath, String docId) throws UIMAException { JCas jCas = JCasFactory.createJCas(); Header header = new Header(jCas); header.setDocId(docId); header.setSource(filePath); header.addToIndexes(); return jCas; }
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public Header(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
Header header = new Header(jcas); header.setDocId(pubmedId); header.setTitle(title); header.setComponentId(PubmedWebServiceCollectionReader.COMPONENT_ID); header.addToIndexes();
|| typeName.equals(TypeSystem.PUBMED_HEADER)) { Header h = (Header) a; doc.put(ID, h.getDocId()); // LATER set prefix doc.put(PM_ID, parseInt(h.getDocId())); doc.put(TITLE, h.getTitle()); try { Header header = JCasUtil.selectSingle(jCas, Header.class); sourceFile = header.getSource(); } catch (Throwable t2) {// nope
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { Header header = selectSingle(jCas, Header.class); File pdfFile = new File(header.getSource()); checkFileExists(pdfFile); LOG.debug("extracting {}", pdfFile.getName()); try { PDFTextStream pdf; if (pdfFile.getName().endsWith("zip")) { InputStream is = unzipUniqueFileAsStream(pdfFile); pdf = new PDFTextStream(is, removeExtension(pdfFile.getName())); } else { pdf = new PDFTextStream(pdfFile); } BlockHandler blueHandler = new BlockHandler(); pdf.pipe(blueHandler); pdf.close(); PdfCollectionReader.extractText(jCas, blueHandler.getDoc(), header.getDocId(), expandAbbrevs); if (extractTables) PdfCollectionReader .extractTables(tableExtractor, pdfFile, jCas); // if (extractReferences) // extractReferences(f, jcas); } catch (Throwable t) { LOG.error("error extracting " + header.getSource(), t); // throw new AnalysisEngineProcessException(e); } }
public FeatureStructure createFS(int addr, CASImpl cas) { if (Header_Type.this.useExistingInstance) { // Return eq fs instance if already created FeatureStructure fs = Header_Type.this.jcas.getJfsFromCaddr(addr); if (null == fs) { fs = new Header(addr, Header_Type.this); Header_Type.this.jcas.putJfsFromCaddr(addr, fs); return fs; } return fs; } else return new Header(addr, Header_Type.this); } };
try { Header header = JCasUtil.selectSingle(jCas, Header.class); sourceFile = header.getSource(); } catch (Throwable t2) {// nope
/** Internal - constructor used by generator * @generated * @param addr low level Feature Structure reference * @param type the type of this Feature Structure */ public Header(int addr, TOP_Type type) { super(addr, type); readObject(); }
/** @generated * @param jcas JCas to which this Feature Structure belongs */ public Header(JCas jcas) { super(jcas); readObject(); }
@Override public void getNext(JCas jcas) throws IOException, CollectionException { File f = fileIterator.next(); Header header = new Header(jcas); // .* removes the tmp part header.setDocId(f.getName().replaceAll("\\.pdf.*", "")); header.setSource(f.getAbsolutePath()); header.addToIndexes(); PDFTextStream pdf = new PDFTextStream(f); BlockHandler blueHandler = new BlockHandler(); pdf.pipe(blueHandler); pdf.close(); extractText(jcas, blueHandler.getDoc(), header.getDocId(), expandAbbrevs); if (extractTables) extractTables(tableExtractor, f, jcas); // printHtml(jcas, new File("target/" + header.getDocId() + ".html")); }