@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { Collection<JCoReURI> jcoreUris = JCasUtil.select(aJCas, JCoReURI.class); if (log.isDebugEnabled()) log.debug("Received batch of {} NXML URIs", jcoreUris.size()); currentUriBatch = jcoreUris.stream().map(JCoReURI::getUri).map(URI::create).iterator(); try { casPopulator = new CasPopulator(currentUriBatch); } catch (IOException e) { log.error("Exception occurred when trying to inizialize the NXML parser", e); throw new AnalysisEngineProcessException(e); } }
private void setTokenId(JCas aJCas, Map<Integer, String> aTokenAddress) { int sentenceId = 1; for (Sentence sentence : select(aJCas, Sentence.class)) { int tokenId = 1; for (Token token : selectCovered(Token.class, sentence)) { aTokenAddress.put(token.getAddress(), sentenceId + "-" + tokenId++); } sentenceId++; } }
public static <T extends TOP> T getOrCreate(JCas jcas, Class<T> targetClass) { if (JCasUtil.exists(jcas, targetClass)) { return JCasUtil.selectSingle(jcas, targetClass); } else { T annotation = (T) jcas.getCas().createFS(JCasUtil.getType(jcas, targetClass)); jcas.getCas().addFsToIndexes(annotation); return annotation; } } }
/** * Construct the sentence factory for the given jCas. * * @param jCas to create senteces from */ public SentenceFactory(JCas jCas) { this( JCasUtil.indexCovered( jCas, uk.gov.dstl.baleen.types.language.Sentence.class, WordToken.class), JCasUtil.indexCovering(jCas, WordToken.class, Entity.class), JCasUtil.indexCovering(jCas, WordToken.class, PhraseChunk.class), JCasUtil.indexCovered( jCas, uk.gov.dstl.baleen.types.language.Sentence.class, Dependency.class)); }
@Override protected List<SourceTargetPair> getSourceTargetPairs(JCas jCas) { List<SourceTargetPair> pairs = Lists.newArrayList(); DocumentCreationTime dct = JCasUtil.selectSingle(jCas, DocumentCreationTime.class); for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { for (Event event : JCasUtil.selectCovered(jCas, Event.class, sentence)) { pairs.add(new SourceTargetPair(event, dct)); } } return pairs; } }
@Test public void testNoChunks() throws Exception { AnalysisEngine ae = AnalysisEngineFactory.createEngine(TestAnnotator.class); jCas.setDocumentText("PERSON JOHN SMITH was seen entering the warehouse"); ae.process(jCas); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); assertEquals("JOHN SMITH", JCasUtil.selectByIndex(jCas, Person.class, 0).getValue()); }
@Test public void testSingleTokenRelationWithoutFeatureValue() throws Exception { JCas jcas = makeJCasOneSentence(); CAS cas = jcas.getCas(); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); Token gov = tokens.get(0); Token dep = tokens.get(tokens.size() - 1); Type relationType = cas.getTypeSystem().getType("webanno.custom.Relation"); // One at the beginning // WebAnno legacy conventions // AnnotationFS fs1 = cas.createAnnotation(relationType, // min(dep.getBegin(), gov.getBegin()), // max(dep.getEnd(), gov.getEnd())); // DKPro Core conventions AnnotationFS fs1 = cas.createAnnotation(relationType, dep.getBegin(), dep.getEnd()); FSUtil.setFeature(fs1, "Governor", gov); FSUtil.setFeature(fs1, "Dependent", dep); cas.addFsToIndexes(fs1); writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_RELATION_LAYERS, asList("webanno.custom.Relation")); }
@Test public void testMultiTokenChain() throws Exception { JCas jcas = makeJCasOneSentence(); CAS cas = jcas.getCas(); List<Token> tokens = new ArrayList<>(select(jcas, Token.class)); Token t1 = tokens.get(0); Token t2 = tokens.get(1); Token t3 = tokens.get(2); Token t4 = tokens.get(3); Type head = cas.getTypeSystem().getType("webanno.custom.SimpleChain"); Type link = cas.getTypeSystem().getType("webanno.custom.SimpleLink"); makeChainHead(head, makeChainLink(link, cas, t1.getBegin(), t2.getEnd(), null, null, makeChainLink(link, cas, t3.getBegin(), t4.getEnd(), null, null, null))); writeAndAssertEquals(jcas, WebannoTsv3Writer.PARAM_CHAIN_LAYERS, asList("webanno.custom.Simple")); } @Test
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { JCas sourceView; try { sourceView = jCas.getView(this.sourceViewName); } catch (CASException e) { throw new AnalysisEngineProcessException(e); } CasCopier copier = new CasCopier(sourceView.getCas(), jCas.getCas()); Feature sofaFeature = jCas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_SOFA); // copy document text jCas.setDocumentText(sourceView.getDocumentText()); // copy text annotation Text sourceText = JCasUtil.selectSingle(sourceView, Text.class); Text text = (Text) copier.copyFs(sourceText); text.setFeatureValue(sofaFeature, jCas.getSofa()); text.addToIndexes(); // copy document creation time DocumentCreationTime sourceTime = JCasUtil.selectSingle( sourceView, DocumentCreationTime.class); DocumentCreationTime time = (DocumentCreationTime) copier.copyFs(sourceTime); time.setFeatureValue(sofaFeature, jCas.getSofa()); time.addToIndexes(); } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { JCas srcView, tgtView; try { srcView = aJCas.getView(sourceViewName); tgtView = aJCas.getView(targetViewName); for (Annotation a : JCasUtil.select(srcView, clazz)) { AnnotationFactory.createAnnotation(tgtView, a.getBegin(), a.getEnd(), clazz); } } catch (CASException e) { throw new AnalysisEngineProcessException(e); } }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { JCas goldView; try { goldView = jCas.getView("GoldView"); } catch (CASException e) { throw new AnalysisEngineProcessException(e); } JCas systemView; try { systemView = jCas.getView("_InitialView"); } catch (CASException e) { throw new AnalysisEngineProcessException(e); } // iterate over sentences in case the context is needed for(Sentence sentence : JCasUtil.select(systemView, Sentence.class)) { List<EventMention> events = JCasUtil.selectCovered(goldView, EventMention.class, sentence); List<EntityMention> entities = JCasUtil.selectCovered(goldView, EntityMention.class, sentence); } } }
private void setTokenId(JCas aJCas, Map<Integer, String> aTokenAddress) { LowLevelCAS llCas = aJCas.getLowLevelCas(); int sentenceId = 1; for (Sentence sentence : select(aJCas, Sentence.class)) { int tokenId = 1; for (Token token : selectCovered(Token.class, sentence)) { aTokenAddress.put(llCas.ll_getFSRef(token), sentenceId + "-" + tokenId++); } sentenceId++; } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { try{ logger.debug("Entering annotator WSDAnnotatorBaseDocumentCollective."); Map<WSDItem, Map<String, Double>> disambiguationResults = getDisambiguation(JCasUtil .select(aJCas, WSDItem.class), aJCas.getDocumentText()); for (WSDItem wsdItem : disambiguationResults.keySet()) { if (maxItemsAttempted >= 0 && numItemsAttempted++ >= maxItemsAttempted) { return; } setWSDItem(aJCas, wsdItem, disambiguationResults.get(wsdItem)); } } catch (SenseInventoryException e) { throw new AnalysisEngineProcessException(e); } } }
public String getMarkdownString(JCas jcas) { Feature feat = jcas.getTypeSystem().getType(type).getFeatureByBaseName(featureName); SortedSet<Insertion> insertions = new TreeSet<Insertion>(); for (Annotation a : JCasUtil.select(jcas, clazz)) { insertions.add(new Insertion(beginMark, a.getBegin())); insertions.add(new Insertion(endMark + "~" + a.getFeatureValueAsString(feat) + "~", a.getEnd())); } StringBuilder exportString = new StringBuilder(jcas.getDocumentText()); for (Insertion ins : insertions) { exportString.insert(ins.position, ins.content); } if (doubleNewline) return exportString.toString().replaceAll("\n", "\n\n"); else return exportString.toString().replaceAll("\n", " \n"); }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { if (tempData == null) { try { tempData = File.createTempFile("dkpro-arktweet-pos-trainer", ".tsv"); out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(tempData), StandardCharsets.UTF_8)); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } Map<Sentence, Collection<Token>> index = indexCovered(jCas, Sentence.class, Token.class); for (Sentence sentence : select(jCas, Sentence.class)) { Collection<Token> tokens = index.get(sentence); for (Token token : tokens) { out.printf("%s\t%s%n", token.getText(), token.getPos().getPosValue()); } out.println(); } }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { modelProvider.configure(jCas.getCas()); mappingProvider.configure(jCas.getCas()); List<Token> tokens = selectCovered(jCas, Token.class, 0, jCas.getDocumentText().length()); List<TaggedToken> taggedTokens = tagTweetTokens(tokens, modelProvider.getResource()); for (TaggedToken taggedToken : taggedTokens) { Type posType = mappingProvider.getTagType(taggedToken.tag); POS pos = (POS) jCas.getCas().createAnnotation(posType, taggedToken.getBegin(), taggedToken.getEnd()); pos.setPosValue(taggedToken.tag.intern()); pos.addToIndexes(); taggedToken.token.setPos(pos); } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { DocumentMetaData dmd = JCasUtil.selectSingle(aJCas, DocumentMetaData.class); try { bw.write(dmd.getDocumentId() + "\t" + aJCas.getDocumentText() + "\n"); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } }
@Override protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException { final Collection<Structure> structures = JCasUtil.select(jCas, Structure.class); if (structures.isEmpty()) { // If the jCas has no structural annotations then the entire text should be marked as a text // block final int end = jCas.getDocumentText().length(); final Text t = new Text(jCas, 0, end); addToJCasIndex(t); } else { // Otherwise add the types we want... structures .stream() .filter(s -> structuralClasses.contains(s.getClass())) .map(s -> new Text(jCas, s.getBegin(), s.getEnd())) .forEach(this::addToJCasIndex); // Now remove any that cover others, so we keep only biggest/most detailed as per request final Map<Text, Collection<Text>> cover; if (keepSmallest) { cover = JCasUtil.indexCovering(jCas, Text.class, Text.class); } else { cover = JCasUtil.indexCovered(jCas, Text.class, Text.class); } cover.forEach( (t, c) -> c.remove(t)); // Remove where x has been pulled out as covering itself (potential bug // introduced in UIMAfit 2.3.0) cover.values().stream().flatMap(Collection::stream).forEach(this::removeFromJCasIndex); } }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { if (segmentAnnotationClass != null) { for (final Annotation segAnno : JCasUtil.select(jcas, segmentAnnotationClass)) { doAnnotations(jcas, JCasUtil.selectCovered(jcas, baseAnnotation, segAnno), segAnno.getEnd()); } } else { doAnnotations(jcas, JCasUtil.select(jcas, baseAnnotation), jcas.getDocumentText().length()); } }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); outputFile = new File(targetFolder, DeepLearningConstants.FILENAME_MAXIMUM_LENGTH); try { JCas typeFactory = JCasFactory.createJCas(); Type type = JCasUtil.getType(typeFactory, Class.forName(sequenceSpanTypeName)); AnnotationFS sequenceAnno = typeFactory.getCas().createAnnotation(type, 0, 0); sequenceSpanType = sequenceAnno.getType(); type = JCasUtil.getType(typeFactory, Class.forName(instanceTypeName)); AnnotationFS tokenAnno = typeFactory.getCas().createAnnotation(type, 0, 0); instanceType = tokenAnno.getType(); } catch (Exception e) { throw new ResourceInitializationException(e); } }