@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // define chunking type this.chunking = new BioChunking<Token, Time>(Token.class, Time.class); // add features: word, character pattern, stem, pos this.tokenFeatureExtractors = Lists.newArrayList(); this.tokenFeatureExtractors.add(new CoveredTextExtractor<Token>()); NamedFeatureExtractor1<Token> ex = CharacterCategoryPatternFunction.createExtractor(); this.tokenFeatureExtractors.add(ex); this.tokenFeatureExtractors.add(new TimeWordsExtractor<Token>()); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "stem")); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "pos")); // add window of features before and after this.contextFeatureExtractors = Lists.newArrayList(); for (FeatureExtractor1<Token> extractor : this.tokenFeatureExtractors) { this.contextFeatureExtractors.add(new CleartkExtractor<Token, Token>(Token.class, extractor, new Preceding( 3), new Following(3))); } }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context);
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // the token feature extractor: text, char pattern (uppercase, digits, etc.), and part-of-speech this.extractor = new CombinedExtractor1<Token>( new FeatureFunctionExtractor<Token>( new CoveredTextExtractor<Token>(), new CharacterCategoryPatternFunction<Token>(PatternType.REPEATS_MERGED)), new TypePathExtractor<Token>(Token.class, "pos")); // the context feature extractor: the features above for the 3 preceding and 3 following tokens this.contextExtractor = new CleartkExtractor<Token, Token>( Token.class, this.extractor, new Preceding(3), new Following(3)); // the chunking definition: Tokens will be combined to form NamedEntityMentions, with labels // from the "mentionType" attribute so that we get B-location, I-person, etc. this.chunking = new BioChunking<Token, NamedEntityMention>( Token.class, NamedEntityMention.class, "mentionType"); }
public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // a feature extractor that creates features corresponding to the word, the word lower cased // the capitalization of the word, the numeric characterization of the word, and character ngram // suffixes of length 2 and 3. this.tokenFeatureExtractor = new FeatureFunctionExtractor<Token>( new CoveredTextExtractor<Token>(), new LowerCaseFeatureFunction(), new CapitalTypeFeatureFunction(), new NumericTypeFeatureFunction(), new CharacterNgramFeatureFunction(Orientation.RIGHT_TO_LEFT, 0, 2), new CharacterNgramFeatureFunction(Orientation.RIGHT_TO_LEFT, 0, 3)); // a feature extractor that extracts the surrounding token texts (within the same sentence) this.contextFeatureExtractor = new CleartkExtractor<Token, Token>( Token.class, new CoveredTextExtractor<Token>(), new Preceding(2), new Following(2)); }