@Override public List<ClusterMentionFetcher.CollectionTextRelationIdentifiedAnnotationPair> getPairs(JCas jcas, Markable m, JCas prevCas) { List<ClusterMentionFetcher.CollectionTextRelationIdentifiedAnnotationPair> clusters = new ArrayList<>(); if(prevCas == null) return clusters; for(CollectionTextRelation chain : JCasUtil.select(prevCas, CollectionTextRelation.class)){ Collection<Markable> members = JCasUtil.select(chain.getMembers(), Markable.class); if(members.size() > 1) { clusters.add(new ClusterMentionFetcher.CollectionTextRelationIdentifiedAnnotationPair(chain, m)); }else{ Markable singleton = members.iterator().next(); if(singleton.getCoveredText().contains(m.getCoveredText()) || m.getCoveredText().contains(singleton.getCoveredText())){ clusters.add(new ClusterMentionFetcher.CollectionTextRelationIdentifiedAnnotationPair(chain, m)); } } } return clusters; } }
@Override public List<CollectionTextRelationIdentifiedAnnotationPair> getPairs(JCas jcas, Markable mention) { List<CollectionTextRelationIdentifiedAnnotationPair> pairs = new ArrayList<>(); if(markableStrings.contains(mention.getCoveredText().toLowerCase())){ for(CollectionTextRelation cluster : JCasUtil.select(jcas, CollectionTextRelation.class)){ Annotation mostRecent = ClusterUtils.getMostRecent((NonEmptyFSList)cluster.getMembers(), mention); if(mostRecent == null) continue; for(Markable m : JCasUtil.select(cluster.getMembers(), Markable.class)){ if(m == mostRecent) break; // see if any of the members of the cluster have the exact same string as this if(m.getCoveredText().toLowerCase().equals(mention.getCoveredText().toLowerCase())){ pairs.add(new CollectionTextRelationIdentifiedAnnotationPair(cluster, mention)); break; } } } } markableStrings.add(mention.getCoveredText().toLowerCase()); return pairs; } }
@Override public List<CollectionTextRelationIdentifiedAnnotationPair> getPairs(JCas jcas, Markable mention) { List<CollectionTextRelationIdentifiedAnnotationPair> pairs = new ArrayList<>(); if(markableStrings.contains(mention.getCoveredText().toLowerCase())){ for(CollectionTextRelation cluster : JCasUtil.select(jcas, CollectionTextRelation.class)){ Annotation mostRecent = ClusterUtils.getMostRecent((NonEmptyFSList)cluster.getMembers(), mention); if(mostRecent == null) continue; for(Markable m : JCasUtil.select(cluster.getMembers(), Markable.class)){ if(m == mostRecent) break; // see if any of the members of the cluster have the exact same string as this if(m.getCoveredText().toLowerCase().equals(mention.getCoveredText().toLowerCase())){ pairs.add(new CollectionTextRelationIdentifiedAnnotationPair(cluster, mention)); break; } } } } markableStrings.add(mention.getCoveredText().toLowerCase()); return pairs; } }
private static NonEmptyFSList getCorrectDoctor(JCas jcas, Markable mention, List<NonEmptyFSList> drLists){ NonEmptyFSList correctDr = null; if(mention.getCoveredText().length() < 5){ if(drLists.size() > 0){ correctDr = drLists.get(0); String nameText = mention.getCoveredText().substring(4); for(NonEmptyFSList drList : drLists){ FSList curNode = drList; do{ String otherName = ((Markable)((NonEmptyFSList)curNode).getHead()).getCoveredText(); if(otherName.length() >= 5){ otherName = otherName.substring(4);
if(markable.getCoveredText().equals("I")){ System.err.println("Unauthorized markable 'I'"); if(coveredTokens.size() == 1 && coveredTokens.get(0).getPartOfSpeech() != null && coveredTokens.get(0).getPartOfSpeech().startsWith("PRP") && !markable.getCoveredText().toLowerCase().equals("it")){ toRemove.add(markable); }else if(coveredTokens.size() > 0 && (coveredTokens.get(0).getCoveredText().startsWith("Mr.") || coveredTokens.get(0).getCoveredText().startsWith("Dr.") || coveredTokens.get(0).getCoveredText().startsWith("Mrs.") || coveredTokens.get(0).getCoveredText().startsWith("Ms."))){ toRemove.add(markable); }else if(markable.getCoveredText().toLowerCase().endsWith("patient") || markable.getCoveredText().toLowerCase().equals("pt")){ toRemove.add(markable);
private static NonEmptyFSList getCorrectDoctor(JCas jcas, Markable mention, List<NonEmptyFSList> drLists){ NonEmptyFSList correctDr = null; if(mention.getCoveredText().length() < 5){ if(drLists.size() > 0){ correctDr = drLists.get(0); String nameText = mention.getCoveredText().substring(4); for(NonEmptyFSList drList : drLists){ FSList curNode = drList; do{ String otherName = ((Markable)((NonEmptyFSList)curNode).getHead()).getCoveredText(); if(otherName.length() >= 5){ otherName = otherName.substring(4);
if(markable.getCoveredText().equals("I")){ System.err.println("Unauthorized markable 'I'"); if(coveredTokens.size() == 1 && coveredTokens.get(0).getPartOfSpeech() != null && coveredTokens.get(0).getPartOfSpeech().startsWith("PRP") && !markable.getCoveredText().toLowerCase().equals("it")){ toRemove.add(markable); }else if(coveredTokens.size() > 0 && (coveredTokens.get(0).getCoveredText().startsWith("Mr.") || coveredTokens.get(0).getCoveredText().startsWith("Dr.") || coveredTokens.get(0).getCoveredText().startsWith("Mrs.") || coveredTokens.get(0).getCoveredText().startsWith("Ms.") || coveredTokens.get(0).getCoveredText().startsWith("Miss"))){ toRemove.add(markable); }else if(markable.getCoveredText().toLowerCase().endsWith("patient") || markable.getCoveredText().toLowerCase().equals("pt")){ toRemove.add(markable);
private List<CollectionTextRelationIdentifiedAnnotationPair> getExactStringMatchPairs( JCas jcas, IdentifiedAnnotation mention, int sentDist) { List<CollectionTextRelationIdentifiedAnnotationPair> pairs = new ArrayList<>(); if(markableStrings.contains(mention.getCoveredText().toLowerCase())){ for(CollectionTextRelation cluster : JCasUtil.select(jcas, CollectionTextRelation.class)){ Annotation mostRecent = ClusterUtils.getMostRecent((NonEmptyFSList)cluster.getMembers(), mention); if(mostRecent == null) continue; for(Markable m : JCasUtil.select(cluster.getMembers(), Markable.class)){ if(m == mostRecent) break; // see if any of the members of the cluster have the exact same string as this if(m.getCoveredText().toLowerCase().equals(mention.getCoveredText().toLowerCase())){ pairs.add(new CollectionTextRelationIdentifiedAnnotationPair(cluster, mention)); break; } } } } return pairs; }
private List<CollectionTextRelationIdentifiedAnnotationPair> getExactStringMatchPairs( JCas jcas, IdentifiedAnnotation mention, int sentDist) { List<CollectionTextRelationIdentifiedAnnotationPair> pairs = new ArrayList<>(); if(markableStrings.contains(mention.getCoveredText().toLowerCase())){ for(CollectionTextRelation cluster : JCasUtil.select(jcas, CollectionTextRelation.class)){ Annotation mostRecent = ClusterUtils.getMostRecent((NonEmptyFSList)cluster.getMembers(), mention); if(mostRecent == null) continue; for(Markable m : JCasUtil.select(cluster.getMembers(), Markable.class)){ if(m == mostRecent) break; // see if any of the members of the cluster have the exact same string as this if(m.getCoveredText().toLowerCase().equals(mention.getCoveredText().toLowerCase())){ pairs.add(new CollectionTextRelationIdentifiedAnnotationPair(cluster, mention)); break; } } } } return pairs; }
@Override public List<Feature> extract(JCas jCas, Markable mention) throws CleartkExtractorException { List<Feature> features = new ArrayList<>(); String s = mention.getCoveredText().toLowerCase(); boolean isDem = isDemonstrative(s); boolean isDef = isDefinite(s); features.add(new Feature("MC_MENTION_DEM", isDem)); features.add(new Feature("MC_MENTION_DEF", isDef)); String gender = getGender(s); features.add(new Feature("MC_MENTION_GENDER", gender)); boolean singular = numberSingular(jCas, mention, s); features.add(new Feature("MC_MENTION_NUMBER", singular)); return features; } }
@Override public List<Feature> extract(JCas jCas, Markable mention) throws CleartkExtractorException { if(cache == null){ throw new RuntimeException("This extractor requires a call to setCache()"); } List<Feature> features = new ArrayList<>(); String s = mention.getCoveredText().toLowerCase(); boolean isDem = isDemonstrative(s); boolean isDef = isDefinite(s); features.add(new Feature("MC_MENTION_DEM", isDem)); features.add(new Feature("MC_MENTION_DEF", isDef)); String gender = getGender(s); features.add(new Feature("MC_MENTION_GENDER", gender)); boolean singular = numberSingular(jCas, mention, s, cache.get(mention)); features.add(new Feature("MC_MENTION_NUMBER", singular)); return features; }
logger.warn(String.format("The markable %s has no head node, probably because of poorly-segmented text.", goldMarkable.getCoveredText())); continue; if (!match) { logger.warn(String.format("There is a gold markable %s [%d, %d] which could not map to a system markable.", goldMarkable.getCoveredText(), goldMarkable.getBegin(), goldMarkable.getEnd())); System.out.print(" "); for(Markable m : mappedElements){ System.out.print(" -> " + m.getCoveredText());
for (CollectionTextRelation rel : JCasUtil.select(goldView, CollectionTextRelation.class)) { for (Markable member : JCasUtil.select(rel.getMembers(), Markable.class)) { System.out.print("Mention: " + member.getCoveredText().replace("\n", "<CR>")); goldEntChainMap.put(member, goldChainNum); System.out.print(" -----> "); for (CollectionTextRelation rel : JCasUtil.select(docView, CollectionTextRelation.class)) { for (Markable member : JCasUtil.select(rel.getMembers(), Markable.class)) { System.out.print("Mention: " + member.getCoveredText().replace("\n", "<CR>")); sysEntChainMap.put(member, sysChainNum); if(!member.getView().getViewName().equals(docView.getViewName())){
String text = "<Out of bounds>"; if(!(goldMarkable.getBegin() < 0 || goldMarkable.getEnd() >= jcas.getDocumentText().length())){ text = goldMarkable.getCoveredText();
String text = "<Out of bounds>"; if(!(goldMarkable.getBegin() < 0 || goldMarkable.getEnd() >= jcas.getDocumentText().length())){ text = goldMarkable.getCoveredText();
System.out.print("W/in doc chain "); for (Markable m : markables) { System.out.print(" -> " + m.getCoveredText()); markable2chain.put(m, chain);
for ( Markable mention : markables ) { ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jCas, mention); String mentionText = mention.getCoveredText().toLowerCase(); boolean singleton = true; double maxScore = Double.NEGATIVE_INFINITY; markableStrings.add(mention.getCoveredText().toLowerCase());
LOGGER.info("Writing positive instance linking mention [" + mention.getCoveredText() + "] to cluster with elements from previous document");
for(Markable mention : JCasUtil.selectCovered(jCas, Markable.class, segment)){ ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jCas, mention); String mentionText = mention.getCoveredText().toLowerCase(); boolean singleton = true; double maxScore = Double.NEGATIVE_INFINITY; markableStrings.add(mention.getCoveredText().toLowerCase());