@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); List<FeatureExtractor1<Event>> extractors = Lists.newArrayList(); extractors.add(new TypePathExtractor<Event>(Event.class, "tense")); extractors.add(new TypePathExtractor<Event>(Event.class, "aspect")); extractors.add(new TypePathExtractor<Event>(Event.class, "eventClass")); this.setSourceExtractors(extractors); this.setTargetExtractors(extractors); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // I explored a ton of features here, and the following were the only ones that worked // The only feature that I didn't try that seems like it might still have some promise // would be to find any times within, say, 5 tokens, and do the time value comparison // to see whether the nearby time is before, overlapping with or after the DCT List<FeatureExtractor1<Event>> srcExtractors = Lists.newArrayList(); srcExtractors.add(new TypePathExtractor<Event>(Event.class, "tense")); srcExtractors.add(new TypePathExtractor<Event>(Event.class, "aspect")); srcExtractors.add(new TypePathExtractor<Event>(Event.class, "eventClass")); srcExtractors.add(new TypePathExtractor<Event>(Event.class, "polarity")); srcExtractors.add(new TypePathExtractor<Event>(Event.class, "modality")); // the word, but only if it's an aspectual event srcExtractors.add( new FilteringExtractor<Event>(Event.class, new CoveredTextExtractor<Event>()) { @Override protected boolean accept(Event event) { return event.getEventClass().equals("ASPECTUAL"); } }); this.setSourceExtractors(srcExtractors); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>( Token.class, "stem"), new Bag(new Covered()))); this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>( Token.class, "pos"), new Bag(new Covered()))); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); List<FeatureExtractor1<Event>> extractors = Lists.newArrayList(); extractors.add(new TypePathExtractor<Event>(Event.class, "tense")); extractors.add(new TypePathExtractor<Event>(Event.class, "aspect")); extractors.add(new TypePathExtractor<Event>(Event.class, "eventClass")); extractors.add(new SyntacticFirstChildOfGrandparentOfLeafExtractor<Event>()); this.setSourceExtractors(extractors); this.setTargetExtractors(extractors); List<FeatureExtractor2<Anchor, Anchor>>btweenExtractors = Lists.newArrayList(); btweenExtractors.add(new SyntacticLeafToLeafPathPartsExtractor<Anchor, Anchor>()); btweenExtractors.add(new CleartkExtractor<Anchor, Token>(Token.class, new CoveredTextExtractor<Token>(), new Bag(new Covered()))); this.setBetweenExtractors(btweenExtractors); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // add features: word, stem, pos this.tokenFeatureExtractors = Lists.newArrayList(); this.tokenFeatureExtractors.add(new CoveredTextExtractor<Token>()); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "stem")); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "pos")); this.tokenFeatureExtractors.add(new ParentNodeFeaturesExtractor()); // add window of features before and after this.contextExtractors = Lists.newArrayList(); this.contextExtractors.add(new CleartkExtractor<Token, Token>( Token.class, new CoveredTextExtractor<Token>(), new Preceding(3), new Following(3))); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // define chunking type this.chunking = new BioChunking<Token, Time>(Token.class, Time.class); // add features: word, character pattern, stem, pos this.tokenFeatureExtractors = Lists.newArrayList(); this.tokenFeatureExtractors.add(new CoveredTextExtractor<Token>()); NamedFeatureExtractor1<Token> ex = CharacterCategoryPatternFunction.createExtractor(); this.tokenFeatureExtractors.add(ex); this.tokenFeatureExtractors.add(new TimeWordsExtractor<Token>()); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "stem")); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "pos")); // add window of features before and after this.contextFeatureExtractors = Lists.newArrayList(); for (FeatureExtractor1<Token> extractor : this.tokenFeatureExtractors) { this.contextFeatureExtractors.add(new CleartkExtractor<Token, Token>(Token.class, extractor, new Preceding( 3), new Following(3))); } }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.eventFeatureExtractors.add(new TextSliceExtractor<Event>(-2)); this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>( Token.class, "pos"), new Bag(new Covered()))); this.contextExtractors.add(new CleartkExtractor<Event, Token>( Token.class, new TokenTextForSelectedPosExtractor("MD", "TO", "IN", "VB"), new Preceding(3))); }
new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily") );
new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily") );
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>( Token.class, "pos"), new Bag(new Covered()))); this.contextExtractors.add(new CleartkExtractor<Event, Token>( Token.class, new TokenTextForSelectedPosExtractor("VB"), new Bag(new Preceding(3)))); }
new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily") ); new CleartkExtractor( BaseToken.class, new TypePathExtractor(BaseToken.class, "partOfSpeech"), new CleartkExtractor.LastCovered(2), new CleartkExtractor.Preceding(3),
srcExtractors.add(new TypePathExtractor<Event>(Event.class, "tense")); srcExtractors.add(new TypePathExtractor<Event>(Event.class, "eventClass")); srcExtractors.add(new CleartkExtractor<Event, Token>(Token.class, prepOrVerbExtractor, new Ngram(new Following(5)))); this.setSourceExtractors(srcExtractors); tgtExtractors.add(new TypePathExtractor<Time>(Time.class, "timeType")); tgtExtractors.add(new TypePathExtractor<Time>(Time.class, "value")); tgtExtractors.add(new CleartkExtractor<Time, Token>(Token.class, prepOrVerbExtractor, new Ngram(new Preceding(5)))); this.setTargetExtractors(tgtExtractors);
new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily") ); new CleartkExtractor( BaseToken.class, new TypePathExtractor(BaseToken.class, "partOfSpeech"), new CleartkExtractor.LastCovered(2), new CleartkExtractor.Preceding(3),
public VerbClauseTemporalAnnotator() { this.eventID = 1; FeatureExtractor1<Token> precedingAuxiliaries = new CleartkExtractor<Token, Token>( Token.class, new TokenTextForSelectedPosExtractor("MD", "TO", "IN", "VB", "RB"), new Preceding(3)); FeatureExtractor1<Token> tokenStemExtractor = new TypePathExtractor<Token>(Token.class, "stem"); FeatureExtractor1<Token> tokenPOSExtractor = new TypePathExtractor<Token>(Token.class, "pos"); this.sourceFeatureExtractors = Lists.newArrayList(); this.sourceFeatureExtractors.add(new NamingExtractor1<Token>("Source", new CoveredTextExtractor<Token>())); this.sourceFeatureExtractors.add(new NamingExtractor1<Token>("Source", tokenPOSExtractor)); this.sourceFeatureExtractors.add(new NamingExtractor1<Token>("Source", tokenStemExtractor)); this.sourceFeatureExtractors.add(new NamingExtractor1<Token>("Source", precedingAuxiliaries)); this.targetFeatureExtractors = Lists.newArrayList(); this.targetFeatureExtractors.add(new NamingExtractor1<Token>("Target", new CoveredTextExtractor<Token>())); this.targetFeatureExtractors.add(new NamingExtractor1<Token>("Target", tokenPOSExtractor)); this.targetFeatureExtractors.add(new NamingExtractor1<Token>("Target", tokenStemExtractor)); this.targetFeatureExtractors.add(new NamingExtractor1<Token>("Target", precedingAuxiliaries)); this.betweenAnchorsFeatureExtractors = new ArrayList<FeatureExtractor1<Annotation>>(); this.betweenAnchorsFeatureExtractors.add(new NamingExtractor1<Annotation>( "WordsBetween", new CleartkExtractor<Annotation, Token>(Token.class, new CoveredTextExtractor<Token>(), new Bag(new Covered())))); this.pathExtractor = new TargetPathExtractor(); }
private FeatureExtractor1<Sentence> createTokenCountsExtractor() { FeatureExtractor1<Token> tokenFieldExtractor = new CoveredTextExtractor<Token>(); switch (this.tokenField) { case COVERED_TEXT: tokenFieldExtractor = new CoveredTextExtractor<Token>(); break; case STEM: tokenFieldExtractor = new TypePathExtractor<Token>(Token.class, "stem"); break; case LEMMA: tokenFieldExtractor = new TypePathExtractor<Token>(Token.class, "lemma"); break; } CleartkExtractor<Sentence, Token> countsExtractor = new CleartkExtractor<Sentence, Token>( Token.class, new StopwordRemovingExtractor<Token>(this.stopwords, tokenFieldExtractor), new CleartkExtractor.Count(new CleartkExtractor.Covered())); return countsExtractor; }
new CleartkExtractor<>( BaseToken.class, new TypePathExtractor<>( BaseToken.class, "partOfSpeech" ), new CleartkExtractor.LastCovered( 2 ), new CleartkExtractor.Preceding( 3 ),
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // the token feature extractor: text, char pattern (uppercase, digits, etc.), and part-of-speech this.extractor = new CombinedExtractor1<Token>( new FeatureFunctionExtractor<Token>( new CoveredTextExtractor<Token>(), new CharacterCategoryPatternFunction<Token>(PatternType.REPEATS_MERGED)), new TypePathExtractor<Token>(Token.class, "pos")); // the context feature extractor: the features above for the 3 preceding and 3 following tokens this.contextExtractor = new CleartkExtractor<Token, Token>( Token.class, this.extractor, new Preceding(3), new Following(3)); // the chunking definition: Tokens will be combined to form NamedEntityMentions, with labels // from the "mentionType" attribute so that we get B-location, I-person, etc. this.chunking = new BioChunking<Token, NamedEntityMention>( Token.class, NamedEntityMention.class, "mentionType"); }