/** * Returns a covering sentence if it starts at the token, null otherwise * * @param t token * @return sentence or null */ public static Sentence sentenceStartsOnToken(Token t) { List<Sentence> sentences = JCasUtil.selectCovering(Sentence.class, t); return (!sentences.isEmpty() && sentences.get(0).getBegin() == t.getBegin()) ? sentences.get(0) : null; }
@Override public List<Feature> extract(JCas jCas, Annotation entity) throws CleartkExtractorException { List<Feature> feats = new ArrayList<Feature>(); List<Sentence> sents = JCasUtil.selectCovering(jCas, Sentence.class, entity.getBegin(), entity.getEnd()); if(sents!= null && sents.size() > 0){ List<String> srlFeats = getEntityFeats(jCas, (IdentifiedAnnotation) entity, sents.get(0)); for(String feat : srlFeats){ feats.add(new Feature(feat)); } } return feats; }
@Override public List<Feature> extract(JCas jCas, Annotation entity) throws CleartkExtractorException { List<Feature> feats = new ArrayList<Feature>(); List<Sentence> sents = JCasUtil.selectCovering(jCas, Sentence.class, entity.getBegin(), entity.getEnd()); if(sents!= null && sents.size() > 0){ List<String> srlFeats = getEntityFeats(jCas, (IdentifiedAnnotation) entity, sents.get(0)); for(String feat : srlFeats){ feats.add(new Feature(feat)); } } return feats; }
private static String formatError(JCas jcas, IdentifiedAnnotation mention){ List<Sentence> context = JCasUtil.selectCovering(jcas, Sentence.class, mention.getBegin(), mention.getEnd()); StringBuffer buff = new StringBuffer(); if(context.size() > 0){ Sentence sent = context.get(0); buff.append(sent.getCoveredText()); long offset = mention.getBegin() - sent.getBegin(); if (offset>=Integer.MAX_VALUE || offset<=Integer.MIN_VALUE) { offset=0; } // for spanless annots buff.insert((int)offset, "***"); offset += (mention.getEnd()-mention.getBegin() + 3); buff.insert((int)offset, "***"); } return buff.toString(); }
/** * Check if the two given begin offsets are within the same sentence. If the second offset * is at the end of the sentence, it is no longer considered to be part of the sentence. * Mind that annotations in UIMA are half-open intervals <code>[begin,end)</code>. If there * is no sentence covering the offsets, the method returns <code>false</code>. * * @param aJcas * the JCAs. * @param aBegin1 * the reference offset. * @param aBegin2 * the comparison offset. * @return if the two offsets are within the same sentence. */ public static boolean isBeginInSameSentence(JCas aJcas, int aBegin1, int aBegin2) { return selectCovering(aJcas, Sentence.class, aBegin1, aBegin1).stream() .filter(s -> s.getBegin() <= aBegin1 && aBegin1 < s.getEnd()) .filter(s -> s.getBegin() <= aBegin2 && aBegin2 < s.getEnd()) .findFirst() .isPresent(); }
/** * Returns true if the token has a preceding whitespace in the original document * * @param token token * @param jCas jcas * @return boolen */ public static boolean hasSpaceBefore(Token token, JCas jCas) { // select previous token(s) List<Token> prevTokens = JCasUtil.selectPreceding(jCas, Token.class, token, 1); Paragraph paragraph = JCasUtil.selectCovering(jCas, Paragraph.class, token).iterator() .next(); return !prevTokens.isEmpty() && (prevTokens.iterator().next().getEnd() != token.getBegin()) && (token.getBegin() != paragraph.getBegin()); }
/** * Returns true, if the argument component annotation ends at this token * * @param t token * @param jCas jcas * @return boolean */ public static boolean argAnnotationEnds(Token t, JCas jCas) { List<ArgumentComponent> argumentAnnotations = new ArrayList<>(); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Claim.class, t.getBegin(), t.getEnd())); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Backing.class, t.getBegin(), t.getEnd())); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Premise.class, t.getBegin(), t.getEnd())); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Rebuttal.class, t.getBegin(), t.getEnd())); argumentAnnotations .addAll(JCasUtil.selectCovering(jCas, Refutation.class, t.getBegin(), t.getEnd())); return !argumentAnnotations.isEmpty() && argumentAnnotations.get(0).getEnd() == t.getEnd(); }
/** * Check if the begin/end offsets are within the same sentence. If the end offset * is at the end of the sentence, it is considered to be part of the sentence. * Mind that annotations in UIMA are half-open intervals <code>[begin,end)</code>. If there * is no sentence covering the offsets, the method returns <code>false</code>. * * @param aJcas * the JCAs. * @param aBegin * the reference offset. * @param aEnd * the comparison offset. * @return if the two offsets are within the same sentence. */ public static boolean isBeginEndInSameSentence(JCas aJcas, int aBegin, int aEnd) { return selectCovering(aJcas, Sentence.class, aBegin, aBegin).stream() .filter(s -> s.getBegin() <= aBegin && aBegin < s.getEnd()) .filter(s -> s.getBegin() <= aEnd && aEnd <= s.getEnd()) .findFirst() .isPresent(); }
/** * Check if the begin/end offsets are within the same sentence. If the end offset * is at the end of the sentence, it is considered to be part of the sentence. * Mind that annotations in UIMA are half-open intervals <code>[begin,end)</code>. If there * is no sentence covering the offsets, the method returns <code>false</code>. * * @param aJcas * the JCAs. * @param aBegin * the reference offset. * @param aEnd * the comparison offset. * @return if the two offsets are within the same sentence. */ public static boolean isBeginEndInSameSentence(JCas aJcas, int aBegin, int aEnd) { return selectCovering(aJcas, Sentence.class, aBegin, aBegin).stream() .filter(s -> s.getBegin() <= aBegin && aBegin < s.getEnd()) .filter(s -> s.getBegin() <= aEnd && aEnd <= s.getEnd()) .findFirst() .isPresent(); }
private static String formatError(JCas jcas, IdentifiedAnnotation mention){ List<Sentence> context = JCasUtil.selectCovering(jcas, Sentence.class, mention.getBegin(), mention.getEnd()); StringBuffer buff = new StringBuffer(); if(context.size() > 0){ Sentence sent = context.get(0); buff.append(sent.getCoveredText()); long offset = mention.getBegin() - sent.getBegin(); if (offset>=Integer.MAX_VALUE || offset<=Integer.MIN_VALUE) { offset=0; } // for spanless annots buff.insert((int)offset, "***"); offset += (mention.getEnd()-mention.getBegin() + 3); buff.insert((int)offset, "***"); } return buff.toString(); }
/** * Check if the two given begin offsets are within the same sentence. If the second offset * is at the end of the sentence, it is no longer considered to be part of the sentence. * Mind that annotations in UIMA are half-open intervals <code>[begin,end)</code>. If there * is no sentence covering the offsets, the method returns <code>false</code>. * * @param aJcas * the JCAs. * @param aBegin1 * the reference offset. * @param aBegin2 * the comparison offset. * @return if the two offsets are within the same sentence. */ public static boolean isBeginInSameSentence(JCas aJcas, int aBegin1, int aBegin2) { return selectCovering(aJcas, Sentence.class, aBegin1, aBegin1).stream() .filter(s -> s.getBegin() <= aBegin1 && aBegin1 < s.getEnd()) .filter(s -> s.getBegin() <= aBegin2 && aBegin2 < s.getEnd()) .findFirst() .isPresent(); }
@Override protected FreqDist<String> getTermFrequencies(JCas jcas) throws AnalysisEngineProcessException { // count all terms with the given annotation FreqDist<String> termFrequencies = new FreqDist<String>(); try { for(Entry<AnnotationFS, String> entry : FeaturePathFactory.select(jcas.getCas(), featurePath)){ int occurrences = 1; for(CoreferenceLink link : JCasUtil.selectCovering(jcas, CoreferenceLink.class, entry.getKey())){ occurrences += getRemainingChainLength(link); } termFrequencies.count(entry.getValue(), occurrences); } } catch (FeaturePathException e) { throw new AnalysisEngineProcessException(e); } return termFrequencies; }
@Override public List<Feature> extract(JCas jCas, IdentifiedAnnotation mention) { List<Feature> features = new ArrayList<Feature>(); List<Sentence> sents = JCasUtil.selectCovering(jCas, Sentence.class, mention.getBegin(), mention.getEnd()); if(sents != null && sents.size() > 0){ Sentence sent = sents.get(0); List<ConllDependencyNode> nodes = JCasUtil.selectCovered(ConllDependencyNode.class, sent); SimpleTree tree = AssertionDepUtils.getTokenTreeString(jCas, nodes, mention, GenerateDependencyRepresentation.UP_NODES); if(tree == null){ System.err.println("Tree is null!"); }else{ AssertionTreeUtils.replaceDependencyWordsWithSemanticClasses(tree, sems); for(SimpleTree frag : frags){ if(TreeUtils.containsDepFragIgnoreCase(tree, frag)){ features.add(new Feature("TreeFrag_" + prefix, frag.toString())); } } } } return features; } }
@Override public List<Feature> extract(JCas jCas, IdentifiedAnnotation mention) { List<Feature> features = new ArrayList<Feature>(); List<Sentence> sents = JCasUtil.selectCovering(jCas, Sentence.class, mention.getBegin(), mention.getEnd()); if(sents != null && sents.size() > 0){ Sentence sent = sents.get(0); List<ConllDependencyNode> nodes = JCasUtil.selectCovered(ConllDependencyNode.class, sent); SimpleTree tree = AssertionDepUtils.getTokenTreeString(jCas, nodes, mention, GenerateDependencyRepresentation.UP_NODES); if(tree == null){ System.err.println("Tree is null!"); }else{ AssertionTreeUtils.replaceDependencyWordsWithSemanticClasses(tree, sems); for(SimpleTree frag : frags){ if(TreeUtils.containsDepFragIgnoreCase(tree, frag)){ features.add(new Feature("TreeFrag_" + prefix, frag.toString())); } } } } return features; } }
@Override public List<Feature> extract(JCas jcas, IdentifiedAnnotation focusAnnotation) throws CleartkExtractorException { List<Feature> feats = new ArrayList<>(); Sentence sent = null; List<Sentence> sents = JCasUtil.selectCovering(jcas, Sentence.class, focusAnnotation.getBegin(), focusAnnotation.getEnd()); if(sents != null && sents.size() > 0){ sent = sents.get(0); }else{ return feats; } List<ConllDependencyNode> nodes = DependencyUtility.getDependencyNodes(jcas, sent); ConllDependencyNode headNode = DependencyUtility.getNominalHeadNode(jcas, focusAnnotation); try { boolean[] regexFeats = conAnal.findNegationContext(nodes, headNode); for(int j = 0; j < regexFeats.length; j++){ if(regexFeats[j]){ feats.add(new Feature("DepPath_" + conAnal.getRegexName(j))); //"NEG_DEP_REGEX_"+j)); } } } catch (Exception e) { e.printStackTrace(); throw new CleartkExtractorException(e); } return feats; }
String treeString = null; List<Sentence> sents = JCasUtil.selectCovering(jCas, Sentence.class, arg1.getBegin(), arg1.getEnd()); if(sents == null || sents.size() == 0){ treeString = "(S (no parse))";
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { JCas systemView; try { systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA); } catch (CASException e) { throw new AnalysisEngineProcessException(e); } for(BinaryTextRelation binaryTextRelation : JCasUtil.select(systemView, BinaryTextRelation.class)) { IdentifiedAnnotation entity1 = (IdentifiedAnnotation) binaryTextRelation.getArg1().getArgument(); IdentifiedAnnotation entity2 = (IdentifiedAnnotation) binaryTextRelation.getArg2().getArgument(); String category = binaryTextRelation.getCategory(); String arg1 = entity1.getCoveredText().toLowerCase(); String arg2 = entity2.getCoveredText().toLowerCase(); List<Sentence> enclosingSentences = JCasUtil.selectCovering( systemView, Sentence.class, entity1.getBegin(), entity2.getEnd()); System.out.format("%s|%s|%s|%s\n", category, arg1, arg2, enclosingSentences.get(0).getCoveredText()); } } }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { JCas systemView; try { systemView = jCas.getView(CAS.NAME_DEFAULT_SOFA); } catch (CASException e) { throw new AnalysisEngineProcessException(e); } for(BinaryTextRelation binaryTextRelation : JCasUtil.select(systemView, BinaryTextRelation.class)) { IdentifiedAnnotation entity1 = (IdentifiedAnnotation) binaryTextRelation.getArg1().getArgument(); IdentifiedAnnotation entity2 = (IdentifiedAnnotation) binaryTextRelation.getArg2().getArgument(); String category = binaryTextRelation.getCategory(); String arg1 = entity1.getCoveredText().toLowerCase(); String arg2 = entity2.getCoveredText().toLowerCase(); List<Sentence> enclosingSentences = JCasUtil.selectCovering( systemView, Sentence.class, entity1.getBegin(), entity2.getEnd()); System.out.format("%s|%s|%s|%s\n", category, arg1, arg2, enclosingSentences.get(0).getCoveredText()); } } }
@Override public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) throws AnalysisEngineProcessException { List<Feature> features = new ArrayList<Feature>(); // entity1 ... entity2 scenario if(arg1.getEnd() < arg2.getBegin()) { for(NP np : JCasUtil.selectCovering(jCas, NP.class, arg1.getBegin(), arg2.getEnd())) { if(arg1.getBegin() == np.getBegin() && arg2.getEnd() == np.getEnd()) { features.add(new Feature("arg1arg2insideNP", true)); } } } // entity2 ... entity1 scenario if(arg2.getEnd() < arg1.getBegin()) { for(NP np : JCasUtil.selectCovering(jCas, NP.class, arg2.getBegin(), arg1.getEnd())) { if(arg2.getBegin() == np.getBegin() && arg1.getEnd() == np.getEnd()) { features.add(new Feature("arg2arg1insideNP", true)); } } } return features; } }
@Override public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) throws AnalysisEngineProcessException { List<Feature> features = new ArrayList<Feature>(); // entity1 ... entity2 scenario if(arg1.getEnd() < arg2.getBegin()) { for(NP np : JCasUtil.selectCovering(jCas, NP.class, arg1.getBegin(), arg2.getEnd())) { if(arg1.getBegin() == np.getBegin() && arg2.getEnd() == np.getEnd()) { features.add(new Feature("arg1arg2insideNP", true)); } } } // entity2 ... entity1 scenario if(arg2.getEnd() < arg1.getBegin()) { for(NP np : JCasUtil.selectCovering(jCas, NP.class, arg2.getBegin(), arg1.getEnd())) { if(arg2.getBegin() == np.getBegin() && arg1.getEnd() == np.getEnd()) { features.add(new Feature("arg2arg1insideNP", true)); } } } return features; } }