/** * Construct the sentence factory for the given jCas. * * @param jCas to create senteces from */ public SentenceFactory(JCas jCas) { this( JCasUtil.indexCovered( jCas, uk.gov.dstl.baleen.types.language.Sentence.class, WordToken.class), JCasUtil.indexCovering(jCas, WordToken.class, Entity.class), JCasUtil.indexCovering(jCas, WordToken.class, PhraseChunk.class), JCasUtil.indexCovered( jCas, uk.gov.dstl.baleen.types.language.Sentence.class, Dependency.class)); }
/** * Construct the sentence factory for the given jCas. * * @param jCas to create senteces from */ public SentenceFactory(JCas jCas) { this( JCasUtil.indexCovered( jCas, uk.gov.dstl.baleen.types.language.Sentence.class, WordToken.class), JCasUtil.indexCovering(jCas, WordToken.class, Entity.class), JCasUtil.indexCovering(jCas, WordToken.class, PhraseChunk.class), JCasUtil.indexCovered( jCas, uk.gov.dstl.baleen.types.language.Sentence.class, Dependency.class)); }
private Map<Sentence, Collection<NamedEntity>> getNamedEntityIndex(JCas aJCas) { Map<Sentence, Collection<NamedEntity>> idx = indexCovered(aJCas, Sentence.class, NamedEntity.class); if (acceptedTagsRegex != null) { Pattern pattern = Pattern.compile(acceptedTagsRegex); Map<Sentence, Collection<NamedEntity>> filteredIdx = new HashMap<>(); for (Sentence sentence : select(aJCas, Sentence.class)) { Collection<NamedEntity> nes = new ArrayList<>(); for (NamedEntity ne : idx.get(sentence)) { if (pattern.matcher(ne.getValue()).matches()) { nes.add(ne); } } filteredIdx.put(sentence, nes); } return filteredIdx; } return idx; }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { if (tempData == null) { try { tempData = File.createTempFile("dkpro-arktweet-pos-trainer", ".tsv"); out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(tempData), StandardCharsets.UTF_8)); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } Map<Sentence, Collection<Token>> index = indexCovered(jCas, Sentence.class, Token.class); for (Sentence sentence : select(jCas, Sentence.class)) { Collection<Token> tokens = index.get(sentence); for (Token token : tokens) { out.printf("%s\t%s%n", token.getText(), token.getPos().getPosValue()); } out.println(); } }
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { if (tempData == null) { try { tempData = File.createTempFile("dkpro-stanford-pos-trainer", ".tsv"); out = new PrintWriter(new OutputStreamWriter(new FileOutputStream(tempData), StandardCharsets.UTF_8)); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } } Map<Sentence, Collection<Token>> index = indexCovered(aJCas, Sentence.class, Token.class); for (Sentence sentence : select(aJCas, Sentence.class)) { Collection<Token> tokens = index.get(sentence); for (Token token : tokens) { out.printf("%s\t%s%n", token.getText(), token.getPos().getPosValue()); } out.println(); } }
Multimap<ReferenceTarget, T> map = ReferentUtils.createReferentMap(jCas, clazz); Map<T, Collection<Sentence>> index = JCasUtil.indexCovering(jCas, clazz, Sentence.class); Map<T, Collection<WordToken>> tokens = JCasUtil.indexCovered(jCas, clazz, WordToken.class);
Multimap<ReferenceTarget, T> map = ReferentUtils.createReferentMap(jCas, clazz); Map<T, Collection<Sentence>> index = JCasUtil.indexCovering(jCas, clazz, Sentence.class); Map<T, Collection<WordToken>> tokens = JCasUtil.indexCovered(jCas, clazz, WordToken.class);
@Override protected void write(JCas jCas) { final String source = getDocumentAnnotation(jCas).getSourceUri(); // For each entity we need to find all the other sentences they are contained in // This should be all entities and sentences final Map<Entity, Collection<Sentence>> coveringSentence = JCasUtil.indexCovering(jCas, Entity.class, Sentence.class); final Map<Sentence, Collection<Entity>> coveredEntities = JCasUtil.indexCovered(jCas, Sentence.class, Entity.class); final Map<Sentence, Collection<WordToken>> coveredTokens = JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class); final Map<WordToken, Collection<Entity>> coveringEntity = JCasUtil.indexCovering(jCas, WordToken.class, Entity.class); JCasUtil.select(jCas, Entity.class) .stream() .map( e -> convertEntityToRow( source, coveringSentence, coveredEntities, coveredTokens, coveringEntity, e)) .filter(s -> s.length > 0) .forEach(this::write); }
@Override protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException { final Collection<Structure> structures = JCasUtil.select(jCas, Structure.class); if (structures.isEmpty()) { // If the jCas has no structural annotations then the entire text should be marked as a text // block final int end = jCas.getDocumentText().length(); final Text t = new Text(jCas, 0, end); addToJCasIndex(t); } else { // Otherwise add the types we want... structures .stream() .filter(s -> structuralClasses.contains(s.getClass())) .map(s -> new Text(jCas, s.getBegin(), s.getEnd())) .forEach(this::addToJCasIndex); // Now remove any that cover others, so we keep only biggest/most detailed as per request final Map<Text, Collection<Text>> cover; if (keepSmallest) { cover = JCasUtil.indexCovering(jCas, Text.class, Text.class); } else { cover = JCasUtil.indexCovered(jCas, Text.class, Text.class); } cover.forEach( (t, c) -> c.remove(t)); // Remove where x has been pulled out as covering itself (potential bug // introduced in UIMAfit 2.3.0) cover.values().stream().flatMap(Collection::stream).forEach(this::removeFromJCasIndex); } }
@Override protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException { final Collection<Structure> structures = JCasUtil.select(jCas, Structure.class); if (structures.isEmpty()) { // If the jCas has no structural annotations then the entire text should be marked as a text // block final int end = jCas.getDocumentText().length(); final Text t = new Text(jCas, 0, end); addToJCasIndex(t); } else { // Otherwise add the types we want... structures .stream() .filter(s -> structuralClasses.contains(s.getClass())) .map(s -> new Text(jCas, s.getBegin(), s.getEnd())) .forEach(this::addToJCasIndex); // Now remove any that cover others, so we keep only biggest/most detailed as per request final Map<Text, Collection<Text>> cover; if (keepSmallest) { cover = JCasUtil.indexCovering(jCas, Text.class, Text.class); } else { cover = JCasUtil.indexCovered(jCas, Text.class, Text.class); } cover.forEach( (t, c) -> c.remove(t)); // Remove where x has been pulled out as covering itself (potential bug // introduced in UIMAfit 2.3.0) cover.values().stream().flatMap(Collection::stream).forEach(this::removeFromJCasIndex); } }
/** * {@inheritDoc} */ @Override public void writeFile( final JCas jCas, final String outputDir, final String documentId, final String fileName ) throws IOException { try ( Writer writer = new BufferedWriter( new FileWriter( outputDir + "/" + fileName + "_cui.txt" ) ) ) { final Map<Sentence, Collection<IdentifiedAnnotation>> sentenceCodes = JCasUtil.indexCovered( jCas, Sentence.class, IdentifiedAnnotation.class ); for ( Map.Entry<Sentence, Collection<IdentifiedAnnotation>> entry : sentenceCodes.entrySet() ) { final int sentenceBegin = entry.getKey().getBegin(); final int sentenceEnd = entry.getKey().getEnd(); for ( IdentifiedAnnotation annotation : entry.getValue() ) { if ( annotation.getBegin() == sentenceBegin && annotation.getEnd() == sentenceEnd ) { for ( UmlsConcept umls : OntologyConceptUtil.getUmlsConcepts( annotation ) ) { writer.write( umls.getCui() + '|' + umls.getTui() + '|' + annotation.getCoveredText() + '\n' ); } } } } } }
JCasUtil.indexCovered(identifiedAnnotationView, EventMention.class, EventMention.class);
JCasUtil.indexCovered(identifiedAnnotationView, EventMention.class, EventMention.class);
.indexCovered(jCas, BrainRegionChunk.class, BrainRegionDictTerm.class); Map<BRCooc, Collection<BrainRegionChunk>> coocsIdx = JCasUtil .indexCovered(jCas, BRCooc.class, BrainRegionChunk.class);
JCasUtil.indexCovered(relationView, EventMention.class, EventMention.class);
JCasUtil.indexCovered(relationView, EventMention.class, EventMention.class);
@Override public void createAnnotations( final JCas jcas ) throws AnalysisEngineProcessException { final String docId = DocumentIDAnnotationUtil.getDocumentID( jcas ); logger.info( "Started processing: " + docId ); // iterate over sentences Parse parse = null; // final Collection<Sentence> allSentences = org.apache.uima.fit.util.JCasUtil.select( jcas, Sentence.class ); // for ( Sentence sentence : allSentences ) { final Map<Sentence, Collection<BaseToken>> sentenceTokenMap = JCasUtil.indexCovered( jcas, Sentence.class, BaseToken.class ); for ( Map.Entry<Sentence, Collection<BaseToken>> sentenceTokens : sentenceTokenMap.entrySet() ) { final Sentence sentence = sentenceTokens.getKey(); final String text = sentence.getCoveredText(); if ( text.isEmpty() || isBorderOnly( text ) ) { continue; } // final FSArray terminalArray = TreeUtils.getTerminals( jcas, sentence ); final FSArray terminalArray = TreeUtils.getTerminals( jcas, new ArrayList<>( sentenceTokens.getValue() ) ); final String tokenString = TreeUtils.getSplitSentence( terminalArray ); if ( tokenString.isEmpty() ) { parse = null; } else { final Parse inputTokens = TreeUtils.ctakesTokensToOpennlpTokens( sentence.getBegin(), text, terminalArray ); parse = parser.parse( inputTokens ); } final TopTreebankNode top = TreeUtils.buildAlignedTree( jcas, parse, terminalArray, sentence ); top.addToIndexes(); } logger.info( "Done parsing: " + docId ); }
Map<Sentence, Collection<Token>> index = indexCovered(aJCas, Sentence.class, Token.class); for (Sentence sentence : select(aJCas, Sentence.class)) {
Map<Sentence, Collection<Token>> index = indexCovered(aJCas, Sentence.class, Token.class); for (Sentence sentence : select(aJCas, Sentence.class)) { Collection<Token> tokens = index.get(sentence);
"pos.tagset.tagSplitPattern"); Map<Sentence, Collection<Token>> index = indexCovered(aJCas, Sentence.class, Token.class); for (Sentence sentence : select(aJCas, Sentence.class)) {