@Override public void reset(JCas jcas){ nodeEntMap = JCasUtil.indexCovering(jcas, ConllDependencyNode.class, IdentifiedAnnotation.class); }
@Override public void reset(JCas jcas){ nodeEntMap = JCasUtil.indexCovering(jcas, ConllDependencyNode.class, IdentifiedAnnotation.class); }
@SuppressWarnings( { "rawtypes", "unchecked" } ) // hold my beer and watch this... static private Map<Annotation, List<IdentifiedAnnotation>> createCoveringMap( final JCas jCas, final List<Class<? extends Annotation>> coveredClasses, final List<Class<? extends IdentifiedAnnotation>> coveringClasses ) { final Map<Annotation, List<IdentifiedAnnotation>> allCovering = new HashMap<>(); for ( Class covered : coveredClasses ) { for ( Class covering : coveringClasses ) { allCovering.putAll( JCasUtil.indexCovering( jCas, covered, covering ) ); } } return allCovering; }
/** * Construct the document factory for the given jCas and {@link SentenceFactory} * * @param jCas to base the document on * @param sentenceFactory to use */ public DocumentFactory(JCas jCas, SentenceFactory sentenceFactory) { this( jCas, JCasUtil.indexCovering(jCas, Entity.class, Sentence.class), ReferentUtils.createReferentMap(jCas, Entity.class), sentenceFactory); }
/** * Construct the document factory for the given jCas and {@link SentenceFactory} * * @param jCas to base the document on * @param sentenceFactory to use */ public DocumentFactory(JCas jCas, SentenceFactory sentenceFactory) { this( jCas, JCasUtil.indexCovering(jCas, Entity.class, Sentence.class), ReferentUtils.createReferentMap(jCas, Entity.class), sentenceFactory); }
/** * Construct the sentence factory for the given jCas. * * @param jCas to create senteces from */ public SentenceFactory(JCas jCas) { this( JCasUtil.indexCovered( jCas, uk.gov.dstl.baleen.types.language.Sentence.class, WordToken.class), JCasUtil.indexCovering(jCas, WordToken.class, Entity.class), JCasUtil.indexCovering(jCas, WordToken.class, PhraseChunk.class), JCasUtil.indexCovered( jCas, uk.gov.dstl.baleen.types.language.Sentence.class, Dependency.class)); }
/** * Construct the sentence factory for the given jCas. * * @param jCas to create senteces from */ public SentenceFactory(JCas jCas) { this( JCasUtil.indexCovered( jCas, uk.gov.dstl.baleen.types.language.Sentence.class, WordToken.class), JCasUtil.indexCovering(jCas, WordToken.class, Entity.class), JCasUtil.indexCovering(jCas, WordToken.class, PhraseChunk.class), JCasUtil.indexCovered( jCas, uk.gov.dstl.baleen.types.language.Sentence.class, Dependency.class)); }
/** * Build the covering index of baleen annotations by structure annotations (lazily) filtered to * only the given structural classes * * @param jCas the jCas * @param structuralClasses the structural classes * @return the covering index */ private static Map<Annotation, Collection<Structure>> buildCovering( JCas jCas, Set<Class<? extends Structure>> structuralClasses) { return Maps.transformValues( JCasUtil.indexCovering(jCas, BaleenAnnotation.class, Structure.class), s -> TypeUtils.filterAnnotations(s, structuralClasses)); } }
/** * Build the covering index of baleen annotations by structure annotations (lazily) filtered to * only the given structural classes * * @param jCas the jCas * @param structuralClasses the structural classes * @return the covering index */ private static Map<Annotation, Collection<Structure>> buildCovering( JCas jCas, Set<Class<? extends Structure>> structuralClasses) { return Maps.transformValues( JCasUtil.indexCovering(jCas, BaleenAnnotation.class, Structure.class), s -> TypeUtils.filterAnnotations(s, structuralClasses)); } }
@Override public <T extends Entity> Set<EntityInformation<T>> getEntityInformation( JCas jCas, Class<T> clazz) { Multimap<ReferenceTarget, T> map = ReferentUtils.createReferentMap(jCas, clazz); Map<T, Collection<Sentence>> index = JCasUtil.indexCovering(jCas, clazz, Sentence.class); Set<EntityInformation<T>> infos = new HashSet<>(); for (Map.Entry<ReferenceTarget, Collection<T>> entry : map.asMap().entrySet()) { Collection<Sentence> sentences = entry.getValue().stream().flatMap(m -> index.get(m).stream()).collect(Collectors.toSet()); infos.add(new EntityInformation<T>(entry.getKey(), entry.getValue(), sentences)); } return infos; } }
@Override public <T extends Entity> Set<EntityInformation<T>> getEntityInformation( JCas jCas, Class<T> clazz) { Multimap<ReferenceTarget, T> map = ReferentUtils.createReferentMap(jCas, clazz); Map<T, Collection<Sentence>> index = JCasUtil.indexCovering(jCas, clazz, Sentence.class); Set<EntityInformation<T>> infos = new HashSet<>(); for (Map.Entry<ReferenceTarget, Collection<T>> entry : map.asMap().entrySet()) { Collection<Sentence> sentences = entry.getValue().stream().flatMap(m -> index.get(m).stream()).collect(Collectors.toSet()); infos.add(new EntityInformation<T>(entry.getKey(), entry.getValue(), sentences)); } return infos; } }
@Override protected void write(JCas jCas) { final String source = getDocumentAnnotation(jCas).getSourceUri(); final Map<Event, Collection<Sentence>> coveringSentence = JCasUtil.indexCovering(jCas, Event.class, Sentence.class); JCasUtil.select(jCas, Event.class) .stream() .map(e -> extracted(source, coveringSentence, e)) .filter(s -> s.length > 0) .forEach(this::write); }
JCas jCas, Class<T> clazz) { Multimap<ReferenceTarget, T> map = ReferentUtils.createReferentMap(jCas, clazz); Map<T, Collection<Sentence>> index = JCasUtil.indexCovering(jCas, clazz, Sentence.class); Map<T, Collection<WordToken>> tokens = JCasUtil.indexCovered(jCas, clazz, WordToken.class);
JCas jCas, Class<T> clazz) { Multimap<ReferenceTarget, T> map = ReferentUtils.createReferentMap(jCas, clazz); Map<T, Collection<Sentence>> index = JCasUtil.indexCovering(jCas, clazz, Sentence.class); Map<T, Collection<WordToken>> tokens = JCasUtil.indexCovered(jCas, clazz, WordToken.class);
/** * * @param jCas * @return Mapping from all Markables in the CAS to UMLS IdentifiedAnnotations that share the same dependency head. * Coreference takes place over Markables which may include IdentifiedAnnotations as well as pronouns. So we * get the head token for every Markable, then find all the IdentifiedAnnotations that cover that head, then * filter to those are UMLS semantic group types and whose dependency head is the same as the Markable. */ public static Map<Markable,List<IdentifiedAnnotation>> indexCoveringUmlsAnnotations(JCas jCas){ Map<Markable,List<IdentifiedAnnotation>> map = new HashMap<>(); Map<ConllDependencyNode, Collection<IdentifiedAnnotation>> dep2event = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class); for(CollectionTextRelation cluster : JCasUtil.select(jCas, CollectionTextRelation.class)){ List<Markable> memberList = new ArrayList<>(JCasUtil.select(cluster.getMembers(), Markable.class)); for(Markable member : memberList){ map.put(member, new ArrayList<>()); ConllDependencyNode head = DependencyUtility.getNominalHeadNode(jCas, member); for(IdentifiedAnnotation covering : dep2event.get(head)){ if(isUmlsAnnotation(covering) && head == DependencyUtility.getNominalHeadNode(jCas, covering)){ map.get(member).add(covering); } } } } return map; }
@Override protected void write(JCas jCas) { final String source = getDocumentAnnotation(jCas).getSourceUri(); // For each entity we need to find all the other sentences they are contained in // This should be all entities and sentences final Map<Entity, Collection<Sentence>> coveringSentence = JCasUtil.indexCovering(jCas, Entity.class, Sentence.class); final Map<Sentence, Collection<Entity>> coveredEntities = JCasUtil.indexCovered(jCas, Sentence.class, Entity.class); final Map<Sentence, Collection<WordToken>> coveredTokens = JCasUtil.indexCovered(jCas, Sentence.class, WordToken.class); final Map<WordToken, Collection<Entity>> coveringEntity = JCasUtil.indexCovering(jCas, WordToken.class, Entity.class); JCasUtil.select(jCas, Entity.class) .stream() .map( e -> convertEntityToRow( source, coveringSentence, coveredEntities, coveredTokens, coveringEntity, e)) .filter(s -> s.length > 0) .forEach(this::write); }
coveringMap = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class);
/** * * @param jCas * @return Mapping from all Markables in the CAS to UMLS IdentifiedAnnotations that share the same dependency head. * Coreference takes place over Markables which may include IdentifiedAnnotations as well as pronouns. So we * get the head token for every Markable, then find all the IdentifiedAnnotations that cover that head, then * filter to those are UMLS semantic group types and whose dependency head is the same as the Markable. */ public static Map<Markable,List<IdentifiedAnnotation>> indexCoveringUmlsAnnotations(JCas jCas){ Map<Markable,List<IdentifiedAnnotation>> map = new HashMap<>(); Map<ConllDependencyNode, Collection<IdentifiedAnnotation>> dep2event = JCasUtil.indexCovering(jCas, ConllDependencyNode.class, IdentifiedAnnotation.class); for(CollectionTextRelation cluster : JCasUtil.select(jCas, CollectionTextRelation.class)){ List<Markable> memberList = new ArrayList<>(JCasUtil.select(cluster.getMembers(), Markable.class)); for(Markable member : memberList){ map.put(member, new ArrayList<>()); ConllDependencyNode head = MapFactory.get(MarkableHeadTreeCreator.getKey(jCas), member); for(IdentifiedAnnotation covering : dep2event.get(head)){ if(isUmlsAnnotation(covering) && head == DependencyUtility.getNominalHeadNode(jCas, covering)){ map.get(member).add(covering); } } } } return map; }
@Override protected void doProcess(final JCas jCas) throws AnalysisEngineProcessException { final Collection<Structure> structures = JCasUtil.select(jCas, Structure.class); if (structures.isEmpty()) { // If the jCas has no structural annotations then the entire text should be marked as a text // block final int end = jCas.getDocumentText().length(); final Text t = new Text(jCas, 0, end); addToJCasIndex(t); } else { // Otherwise add the types we want... structures .stream() .filter(s -> structuralClasses.contains(s.getClass())) .map(s -> new Text(jCas, s.getBegin(), s.getEnd())) .forEach(this::addToJCasIndex); // Now remove any that cover others, so we keep only biggest/most detailed as per request final Map<Text, Collection<Text>> cover; if (keepSmallest) { cover = JCasUtil.indexCovering(jCas, Text.class, Text.class); } else { cover = JCasUtil.indexCovered(jCas, Text.class, Text.class); } cover.forEach( (t, c) -> c.remove(t)); // Remove where x has been pulled out as covering itself (potential bug // introduced in UIMAfit 2.3.0) cover.values().stream().flatMap(Collection::stream).forEach(this::removeFromJCasIndex); } }
JCasUtil.indexCovering(jCas, Relation.class, Sentence.class);