@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // define chunking type this.chunking = new BioChunking<Token, Time>(Token.class, Time.class); // add features: word, character pattern, stem, pos this.tokenFeatureExtractors = Lists.newArrayList(); this.tokenFeatureExtractors.add(new CoveredTextExtractor<Token>()); NamedFeatureExtractor1<Token> ex = CharacterCategoryPatternFunction.createExtractor(); this.tokenFeatureExtractors.add(ex); this.tokenFeatureExtractors.add(new TimeWordsExtractor<Token>()); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "stem")); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "pos")); // add window of features before and after this.contextFeatureExtractors = Lists.newArrayList(); for (FeatureExtractor1<Token> extractor : this.tokenFeatureExtractors) { this.contextFeatureExtractors.add(new CleartkExtractor<Token, Token>(Token.class, extractor, new Preceding( 3), new Following(3))); } }
public List<Feature> extractFeatures(JCas jCas, Token token, Sentence sentence) throws CleartkExtractorException { List<Feature> features = new ArrayList<Feature>(); for (FeatureExtractor1<Token> extractor : simpleExtractors) { features.addAll(extractor.extract(jCas, token)); } for (CleartkExtractor<Token, Token> extractor : windowExtractors) { features.addAll(extractor.extractWithin(jCas, token, sentence)); } for (CleartkExtractor<Token, Token> extractor : windowNGramExtractors) { features.addAll(extractor.extractWithin(jCas, token, sentence)); } return features; }
@Override public List<Feature> extract(JCas view, FOCUS_T focusAnnotation) throws CleartkExtractorException { return this.extract(view, focusAnnotation, new NoBounds()); }
new CleartkExtractor( AssertionCuePhraseAnnotation.class, baseExtractorCuePhraseCategory, cuePhraseInWindowExtractor.extractWithin(jcas, current, firstCoveringSentence);
instance.addAll(extractor.extractWithin(identifiedAnnotationView, entityOrEventMention, coveringSent)); }else{ instance.addAll(extractor.extract(identifiedAnnotationView, entityOrEventMention));
@Override public java.util.List<Feature> extract(JCas view, FOCUS_T annotation1, FOCUS_T annotation2) throws CleartkExtractorException { return this.extractBetween(view, annotation1, annotation2); }
new CleartkExtractor( AssertionCuePhraseAnnotation.class, baseExtractorCuePhraseCategory, cuePhraseInWindowExtractor.extractWithin(jcas, current, firstCoveringSentence);
@Override public List<Feature> extract(JCas jCas, IdentifiedAnnotation mention1, IdentifiedAnnotation mention2) throws AnalysisEngineProcessException { List<Feature> features = new ArrayList<Feature>(); Annotation arg1 = mention1; Annotation arg2 = mention2; if(arg1 instanceof EventMention){ arg1 = getExpandedEvent(jCas, mention1); if(arg1 == null) arg1 = mention1; } if(arg2 instanceof EventMention){ arg2 = getExpandedEvent(jCas, mention2); if(arg2 == null) arg2 = mention2; } features.addAll(this.mention1FeaturesExtractor.extract(jCas, arg1)); features.addAll(this.mention2FeaturesExtractor.extract(jCas, arg2)); features.addAll(this.tokensBetween.extractBetween(jCas, arg1, arg2)); features.addAll(this.nTokensBetween.extract(jCas, arg1, arg2)); return features; }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.featuresExtractors = Lists.newArrayList(); this.featuresExtractors.add(new LastWordExtractor<Time>()); FeatureExtractor1<Time> ex = CharacterCategoryPatternFunction.createExtractor(); this.featuresExtractors.add(ex); this.featuresExtractors.add(new TimeWordsExtractor<Time>()); this.featuresExtractors.add(new CleartkExtractor<Time, Token>(Token.class, new CoveredTextExtractor<Token>(), new Bag(new Covered()))); }
new CleartkExtractor( AssertionCuePhraseAnnotation.class, baseExtractorCuePhraseCategory, new CleartkExtractor( BaseToken.class, new CoveredTextExtractor(), new CleartkExtractor( BaseToken.class, new TypePathExtractor(BaseToken.class, "partOfSpeech"), cuePhraseInWindowExtractor.extractWithin(jcas, current, firstCoveringSentence); tokenExtraction1.extractWithin(jcas, current, firstCoveringSentence); if (tokenFeatures != null && !tokenFeatures.isEmpty())
tokenFeatures.addAll(this.contextFeatureExtractor.extractWithin(jCas, token, sentence)); tokenFeatureLists.add(tokenFeatures);
/** * Extract features from the annotations around the focus annotation and within the given bounds. * * @param view * The JCas containing the focus annotation. * @param focusAnnotation * The annotation whose context is to be searched. * @param boundsAnnotation * The boundary within which context annotations may be identified. * @return The features extracted in the context of the focus annotation. */ public List<Feature> extractWithin(JCas view, FOCUS_T focusAnnotation, Annotation boundsAnnotation) throws CleartkExtractorException { Bounds bounds = new SpanBounds(boundsAnnotation.getBegin(), boundsAnnotation.getEnd()); return this.extract(view, focusAnnotation, bounds); }
@Override public List<Feature> extract(JCas jCas, IdentifiedAnnotation mention1, IdentifiedAnnotation mention2) throws AnalysisEngineProcessException { List<Feature> features = new ArrayList<Feature>(); Annotation arg1 = mention1; Annotation arg2 = mention2; if(arg1 instanceof EventMention){ arg1 = getExpandedEvent(jCas, mention1); if(arg1 == null) arg1 = mention1; } if(arg2 instanceof EventMention){ arg2 = getExpandedEvent(jCas, mention2); if(arg2 == null) arg2 = mention2; } features.addAll(this.mention1FeaturesExtractor.extract(jCas, arg1)); features.addAll(this.mention2FeaturesExtractor.extract(jCas, arg2)); features.addAll(this.tokensBetween.extractBetween(jCas, arg1, arg2)); features.addAll(this.nTokensBetween.extract(jCas, arg1, arg2)); return features; }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // add features: word, stem, pos this.tokenFeatureExtractors = Lists.newArrayList(); this.tokenFeatureExtractors.add(new CoveredTextExtractor<Token>()); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "stem")); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "pos")); this.tokenFeatureExtractors.add(new ParentNodeFeaturesExtractor()); // add window of features before and after this.contextExtractors = Lists.newArrayList(); this.contextExtractors.add(new CleartkExtractor<Token, Token>( Token.class, new CoveredTextExtractor<Token>(), new Preceding(3), new Following(3))); }
new CleartkExtractor( AssertionCuePhraseAnnotation.class, baseExtractorCuePhraseCategory, new CleartkExtractor( BaseToken.class, new CoveredTextExtractor(), new CleartkExtractor( BaseToken.class, new TypePathExtractor(BaseToken.class, "partOfSpeech"), cuePhraseInWindowExtractor.extractWithin(jcas, current, firstCoveringSentence); tokenExtraction1.extractWithin(jcas, current, firstCoveringSentence); if (tokenFeatures != null && !tokenFeatures.isEmpty())
features.addAll(extractor.extractWithin(jCas, token, sentence));
public List<Feature> extractBetween(JCas view, FOCUS_T annotation1, FOCUS_T annotation2) throws CleartkExtractorException { int begin = annotation1.getEnd(); int end = annotation2.getBegin(); // FIXME: creating a new annotation may leak memory - is there a better approach? Annotation focusAnnotation = new Annotation(view, begin, end); return this.extract(view, focusAnnotation, new NoBounds()); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.eventFeatureExtractors.add(new TextSliceExtractor<Event>(-2)); this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>( Token.class, "pos"), new Bag(new Covered()))); this.contextExtractors.add(new CleartkExtractor<Event, Token>( Token.class, new TokenTextForSelectedPosExtractor("MD", "TO", "IN", "VB"), new Preceding(3))); }
features.addAll(extractor.extractWithin(jCas, token, sentence));
List<Feature> features = new ArrayList<Feature>(); features.addAll(this.extractor.extract(jCas, token)); features.addAll(this.contextExtractor.extract(jCas, token)); featureLists.add(features);