@Override public int compare(Markable m1, Markable m2) { // look at the start first if(m1.getBegin() < m2.getBegin()){ return -1; }else if(m2.getBegin() < m1.getBegin()){ return 1; }else if(m1.getEnd() < m2.getEnd()){ return -1; }else if(m2.getEnd() < m1.getEnd()){ return 1; }else{ // m1 and m2 have the exact same span return 0; } } }
public static boolean mapGoldMarkable(JCas jcas, Markable goldMarkable, Map<Markable,Markable> gold2sys, Map<ConllDependencyNode, Collection<Markable>> depIndex){ if(!(goldMarkable.getBegin() < 0 || goldMarkable.getEnd() >= jcas.getDocumentText().length())){ ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jcas, goldMarkable); for(Markable sysMarkable : depIndex.get(headNode)){ ConllDependencyNode markNode = DependencyUtility.getNominalHeadNode(jcas, sysMarkable); if(markNode == headNode){ gold2sys.put(goldMarkable, sysMarkable); return true; } } }else{ // Have seen some instances where anafora writes a span that is not possible, log them // so they can be found and fixed: logger.warn(String.format("There is a markable with span [%d, %d] in a document with length %d\n", goldMarkable.getBegin(), goldMarkable.getEnd(), jcas.getDocumentText().length())); return false; } return false; } }
private static boolean mapGoldMarkable(JCas jcas, Markable goldMarkable, Map<Markable,Markable> gold2sys, Map<ConllDependencyNode, Collection<Markable>> depIndex){ if(!(goldMarkable.getBegin() < 0 || goldMarkable.getEnd() >= jcas.getDocumentText().length())){ ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jcas, goldMarkable); for(Markable sysMarkable : depIndex.get(headNode)){ ConllDependencyNode markNode = DependencyUtility.getNominalHeadNode(jcas, sysMarkable); if(markNode == headNode){ gold2sys.put(goldMarkable, sysMarkable); return true; } } }else{ // Have seen some instances where anafora writes a span that is not possible, log them // so they can be found and fixed: logger.warn(String.format("There is a markable with span [%d, %d] in a document with length %d\n", goldMarkable.getBegin(), goldMarkable.getEnd(), jcas.getDocumentText().length())); return false; } return false; } }
@Override public List<Feature> extract(JCas jcas, Markable mention) throws CleartkExtractorException { List<Feature> feats = new ArrayList<>(); boolean anaInHeader = false; int anaPar = -1; // Find section headers -- paragraphs List<Paragraph> pars = new ArrayList<>(JCasUtil.select(jcas, Paragraph.class)); for(int i = 0; i < pars.size(); i++){ Paragraph par = pars.get(i); if(par.getBegin() > mention.getEnd()){ break; } // find the paragraph with the anaphor if(mention.getBegin() >= par.getBegin() && mention.getEnd() <= par.getEnd()){ anaPar = i; } List<Sentence> coveredSents = JCasUtil.selectCovered(jcas, Sentence.class, par); if(coveredSents != null && coveredSents.size() == 1){ if(anaPar == i){ anaInHeader = true; break; } } } feats.add(new Feature("AnaInHeader", anaInHeader)); return feats; }
@Override public List<Feature> extract(JCas jcas, Markable mention) throws CleartkExtractorException { List<Feature> feats = new ArrayList<>(); boolean anaInHeader = false; int anaPar = -1; // Find section headers -- paragraphs List<Paragraph> pars = new ArrayList<>(JCasUtil.select(jcas, Paragraph.class)); for(int i = 0; i < pars.size(); i++){ Paragraph par = pars.get(i); if(par.getBegin() > mention.getEnd()){ break; } // find the paragraph with the anaphor if(mention.getBegin() >= par.getBegin() && mention.getEnd() <= par.getEnd()){ anaPar = i; } List<Sentence> coveredSents = JCasUtil.selectCovered(jcas, Sentence.class, par); if(coveredSents != null && coveredSents.size() == 1){ if(anaPar == i){ anaInHeader = true; break; } } } feats.add(new Feature("AnaInHeader", anaInHeader)); return feats; }
if(mention.getBegin() < m.getEnd()){
if(mention.getBegin() < m.getEnd()){
@Override public List<Feature> extract(JCas jCas, CollectionTextRelation cluster, IdentifiedAnnotation mention) throws AnalysisEngineProcessException { List<Feature> feats = new ArrayList<>(); double maxSalience = 0.0; for(Markable member : new ListIterable<Markable>(cluster.getMembers())){ if(mention.getBegin() < member.getEnd()){ // during training this might happen -- see a member of a cluster that // is actually subsequent to the candidate mention break; } if(member.getConfidence() > maxSalience){ maxSalience = member.getConfidence(); } } feats.add(new Feature("MC_MAX_SALIENCE", maxSalience)); return feats; }
@Override public List<Feature> extract(JCas jCas, CollectionTextRelation cluster, IdentifiedAnnotation mention) throws AnalysisEngineProcessException { List<Feature> feats = new ArrayList<>(); double maxSalience = 0.0; for(Markable member : new ListIterable<Markable>(cluster.getMembers())){ if(mention.getBegin() < member.getEnd()){ // during training this might happen -- see a member of a cluster that // is actually subsequent to the candidate mention break; } if(member.getConfidence() > maxSalience){ maxSalience = member.getConfidence(); } } feats.add(new Feature("MC_MAX_SALIENCE", maxSalience)); return feats; }
if(member.getBegin() >= par.getBegin() && member.getEnd() <= par.getEnd()){ parsWithAnteHeader.add(i); anteInHeader = true;
if(member.getBegin() >= par.getBegin() && member.getEnd() <= par.getEnd()){ parsWithAnteHeader.add(i); anteInHeader = true;
String key = markable.getBegin() + "-" + (markable.getEnd() - markable.getBegin()); if(existingSpans.contains(key)) continue;
String key = markable.getBegin() + "-" + (markable.getEnd() - markable.getBegin()); if(existingSpans.contains(key)) continue;
NonEmptyFSList element = (NonEmptyFSList) head; Markable goldMarkable = (Markable) element.getHead(); if(!(goldMarkable.getBegin() < 0 || goldMarkable.getEnd() >= jcas.getDocumentText().length())){
NonEmptyFSList element = (NonEmptyFSList) head; Markable goldMarkable = (Markable) element.getHead(); if(!(goldMarkable.getBegin() < 0 || goldMarkable.getEnd() >= jcas.getDocumentText().length())){
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { // createMarkablesUsingConstituencyTrees(jCas); createMarkablesUsingDependencyTrees(jCas); for(TimeMention timex : JCasUtil.select(jCas, TimeMention.class)){ boolean collision = false; for(Markable other : JCasUtil.selectCovered(jCas, Markable.class, timex.getBegin(), timex.getEnd())){ if(other.getBegin() == timex.getBegin() && other.getEnd() == timex.getEnd()){ collision = true; break; } } if(!collision){ Markable m = new Markable(jCas, timex.getBegin(), timex.getEnd()); m.addToIndexes(jCas); } } }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { // createMarkablesUsingConstituencyTrees(jCas); createMarkablesUsingDependencyTrees(jCas); for(TimeMention timex : JCasUtil.select(jCas, TimeMention.class)){ boolean collision = false; for(Markable other : JCasUtil.selectCovered(jCas, Markable.class, timex.getBegin(), timex.getEnd())){ if(other.getBegin() == timex.getBegin() && other.getEnd() == timex.getEnd()){ collision = true; break; } } if(!collision){ Markable m = new Markable(jCas, timex.getBegin(), timex.getEnd()); m.addToIndexes(jCas); } } }