/** @generated */ public DocumentMetaData(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
@Override public void getNext(JCas aJCas) throws IOException, CollectionException { File file = files.poll(); aJCas.setDocumentText(readFileToString(file)); aJCas.setDocumentLanguage(language); DocumentMetaData meta = new DocumentMetaData(aJCas); meta.setDocumentBaseUri(path.toURI().toString()); meta.setDocumentUri(file.toURI().toString()); meta.addToIndexes(); }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { DocumentMetaData meta = iterate(aJCas, DocumentMetaData.class).iterator().next(); // make a new, empty document Document doc = new Document(); // Add the document metadata. Use fields that are indexed (i.e. searchable), but don't // tokenize the field into words. doc.add(new Field("documentUri", meta.getDocumentUri(), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("documentBaseUri", meta.getDocumentBaseUri(), Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("language", aJCas.getDocumentLanguage(), Store.YES, Index.NOT_ANALYZED)); // Add all tokens to the index without any further processing. doc.add(new Field("token", AnnotationStream.create(iterate(aJCas, Token.class)), TermVector.YES)); // Optionally store the document text. It can be retrieved but not searched. if (storeText) { doc.add(new Field("text", aJCas.getDocumentText(), Store.YES, Index.NO)); } try { writer.addDocument(doc); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { DocumentMetaData meta = iterate(aJCas, DocumentMetaData.class).iterator().next(); System.out.println("=== METADATA ========================================"); System.out.println("URI : "+meta.getDocumentUri()); System.out.println("Language: "+aJCas.getDocumentLanguage()); System.out.println("=== TEXT ============================================"); System.out.println(aJCas.getDocumentText()); System.out.println("=== ANNOTATIONS ====================================="); for (Annotation a : iterate(aJCas, Annotation.class)) { System.out.println(a.getType().getName() + "(" + a.getBegin() + "," + a.getEnd() + ") [" + a.getCoveredText() + "]"); } } }
/** @generated */ public DocumentMetaData(JCas jcas) { super(jcas); readObject(); }
public FeatureStructure createFS(int addr, CASImpl cas) { if (DocumentMetaData_Type.this.useExistingInstance) { // Return eq fs instance if already created FeatureStructure fs = DocumentMetaData_Type.this.jcas.getJfsFromCaddr(addr); if (null == fs) { fs = new DocumentMetaData(addr, DocumentMetaData_Type.this); DocumentMetaData_Type.this.jcas.putJfsFromCaddr(addr, fs); return fs; } return fs; } else return new DocumentMetaData(addr, DocumentMetaData_Type.this); } };
@Test public void test() throws Exception { CollectionReader reader = CollectionReaderFactory.createCollectionReader( TextFileReader.class, createTypeSystemDescription(), TextFileReader.PARAM_PATH, "src/test/resources/textfiles", TextFileReader.PARAM_LANGUAGE, "Latin"); int found = 0; CAS cas = CasCreationUtils.createCas(reader.getProcessingResourceMetaData()); while (reader.hasNext()) { reader.getNext(cas); DocumentMetaData meta = iterate(cas.getJCas(), DocumentMetaData.class).iterator().next(); for (Entry<String, String> entry : testFileContent.entrySet()) { if (meta.getDocumentUri().endsWith(entry.getKey())) { assertEquals(entry.getValue(), cas.getDocumentText()); found++; } } cas.reset(); } assertEquals(testFileContent.size(), found); } }
/** Internal - constructor used by generator * @generated */ public DocumentMetaData(int addr, TOP_Type type) { super(addr, type); readObject(); }