org.cleartk.ml.feature.extractor.CoveredTextExtractor java code examples

public TokenTextForSelectedPosExtractor(Collection<String> acceptablePOSTags) {
 this.acceptablePOSTags = new HashSet<String>(acceptablePOSTags);
 this.extractor = new CoveredTextExtractor<Token>();
}

 @Override
 public List<Feature> extract(JCas view, Token token)
   throws CleartkExtractorException {
  List<Feature> features = new ArrayList<Feature>();
  String pos = token.getPos();
  if (pos != null) {
   if (pos.length() > 2) {
    pos = pos.substring(0, 2);
   }
   if (this.acceptablePOSTags.contains(pos)) {
    features.addAll(this.extractor.extract(view, token));
   }
  }
  return features;
 }
}

@Override
public String getFeatureName() {
 return this.extractor.getFeatureName();
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // add features: word, stem, pos
 this.tokenFeatureExtractors = Lists.newArrayList();
 this.tokenFeatureExtractors.add(new CoveredTextExtractor<Token>());
 this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "stem"));
 this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "pos"));
 this.tokenFeatureExtractors.add(new ParentNodeFeaturesExtractor());
 // add window of features before and after
 this.contextExtractors = Lists.newArrayList();
 this.contextExtractors.add(new CleartkExtractor<Token, Token>(
   Token.class,
   new CoveredTextExtractor<Token>(),
   new Preceding(3),
   new Following(3)));
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 this.featuresExtractors = Lists.newArrayList();
 this.featuresExtractors.add(new LastWordExtractor<Time>());
 FeatureExtractor1<Time> ex = CharacterCategoryPatternFunction.createExtractor();
 this.featuresExtractors.add(ex);
 this.featuresExtractors.add(new TimeWordsExtractor<Time>());
 this.featuresExtractors.add(new CleartkExtractor<Time, Token>(Token.class, new CoveredTextExtractor<Token>(), new Bag(new Covered())));
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // I explored a ton of features here, and the following were the only ones that worked
 // The only feature that I didn't try that seems like it might still have some promise
 // would be to find any times within, say, 5 tokens, and do the time value comparison
 // to see whether the nearby time is before, overlapping with or after the DCT
 List<FeatureExtractor1<Event>> srcExtractors = Lists.newArrayList();
 srcExtractors.add(new TypePathExtractor<Event>(Event.class, "tense"));
 srcExtractors.add(new TypePathExtractor<Event>(Event.class, "aspect"));
 srcExtractors.add(new TypePathExtractor<Event>(Event.class, "eventClass"));
 srcExtractors.add(new TypePathExtractor<Event>(Event.class, "polarity"));
 srcExtractors.add(new TypePathExtractor<Event>(Event.class, "modality"));
 // the word, but only if it's an aspectual event
 srcExtractors.add(
   new FilteringExtractor<Event>(Event.class, new CoveredTextExtractor<Event>()) {
    @Override
    protected boolean accept(Event event) {
     return event.getEventClass().equals("ASPECTUAL");
    }
   });
 this.setSourceExtractors(srcExtractors);
}

public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // Create an extractor that gives word counts for a document
 this.extractor = new CleartkExtractor<DocumentAnnotation, Token>(
   Token.class,
   new CoveredTextExtractor<Token>(),
   new Count(new Covered()));
}

public VerbClauseTemporalAnnotator() {
 this.eventID = 1;
 FeatureExtractor1<Token> precedingAuxiliaries = new CleartkExtractor<Token, Token>(
   Token.class,
   new TokenTextForSelectedPosExtractor("MD", "TO", "IN", "VB", "RB"),
   new Preceding(3));
 FeatureExtractor1<Token> tokenStemExtractor = new TypePathExtractor<Token>(Token.class, "stem");
 FeatureExtractor1<Token> tokenPOSExtractor = new TypePathExtractor<Token>(Token.class, "pos");
 this.sourceFeatureExtractors = Lists.newArrayList();
 this.sourceFeatureExtractors.add(new NamingExtractor1<Token>("Source", new CoveredTextExtractor<Token>()));
 this.sourceFeatureExtractors.add(new NamingExtractor1<Token>("Source", tokenPOSExtractor));
 this.sourceFeatureExtractors.add(new NamingExtractor1<Token>("Source", tokenStemExtractor));
 this.sourceFeatureExtractors.add(new NamingExtractor1<Token>("Source", precedingAuxiliaries));
 this.targetFeatureExtractors = Lists.newArrayList();
 this.targetFeatureExtractors.add(new NamingExtractor1<Token>("Target", new CoveredTextExtractor<Token>()));
 this.targetFeatureExtractors.add(new NamingExtractor1<Token>("Target", tokenPOSExtractor));
 this.targetFeatureExtractors.add(new NamingExtractor1<Token>("Target", tokenStemExtractor));
 this.targetFeatureExtractors.add(new NamingExtractor1<Token>("Target", precedingAuxiliaries));
 this.betweenAnchorsFeatureExtractors = new ArrayList<FeatureExtractor1<Annotation>>();
 this.betweenAnchorsFeatureExtractors.add(new NamingExtractor1<Annotation>(
   "WordsBetween",
   new CleartkExtractor<Annotation, Token>(Token.class, new CoveredTextExtractor<Token>(), new Bag(new Covered()))));
 this.pathExtractor = new TargetPathExtractor();
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 List<FeatureExtractor1<Event>> extractors = Lists.newArrayList();
 extractors.add(new TypePathExtractor<Event>(Event.class, "tense"));
 extractors.add(new TypePathExtractor<Event>(Event.class, "aspect"));
 extractors.add(new TypePathExtractor<Event>(Event.class, "eventClass"));
 extractors.add(new SyntacticFirstChildOfGrandparentOfLeafExtractor<Event>());
 this.setSourceExtractors(extractors);
 this.setTargetExtractors(extractors);
 
 List<FeatureExtractor2<Anchor, Anchor>>btweenExtractors = Lists.newArrayList();
 btweenExtractors.add(new SyntacticLeafToLeafPathPartsExtractor<Anchor, Anchor>());
 btweenExtractors.add(new CleartkExtractor<Anchor, Token>(Token.class, new CoveredTextExtractor<Token>(), new Bag(new Covered())));
 this.setBetweenExtractors(btweenExtractors);
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // define chunking type
 this.chunking = new BioChunking<Token, Time>(Token.class, Time.class);
 // add features: word, character pattern, stem, pos
 this.tokenFeatureExtractors = Lists.newArrayList();
 this.tokenFeatureExtractors.add(new CoveredTextExtractor<Token>());
 NamedFeatureExtractor1<Token> ex = CharacterCategoryPatternFunction.createExtractor();
 this.tokenFeatureExtractors.add(ex);
 this.tokenFeatureExtractors.add(new TimeWordsExtractor<Token>());
 this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "stem"));
 this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "pos"));
   
 // add window of features before and after
 this.contextFeatureExtractors = Lists.newArrayList();
 for (FeatureExtractor1<Token> extractor : this.tokenFeatureExtractors) {
  this.contextFeatureExtractors.add(new CleartkExtractor<Token, Token>(Token.class, extractor, new Preceding(
    3), new Following(3)));
 }
}

private TfidfExtractor<String, DocumentAnnotation> initTfIdfExtractor() throws IOException {
 CleartkExtractor<DocumentAnnotation, Token> countsExtractor = new CleartkExtractor<DocumentAnnotation, Token>(
   Token.class,
   new CoveredTextExtractor<Token>(),
   new CleartkExtractor.Count(new CleartkExtractor.Covered()));
 TfidfExtractor<String, DocumentAnnotation> tfIdfExtractor = new TfidfExtractor<String, DocumentAnnotation>(
   DocumentClassificationAnnotator.TFIDF_EXTRACTOR_KEY,
   countsExtractor);
 if (this.tfIdfUri != null) {
  tfIdfExtractor.load(this.tfIdfUri);
 }
 return tfIdfExtractor;
}

private CentroidTfidfSimilarityExtractor<String, DocumentAnnotation> initCentroidTfIdfSimilarityExtractor()
  throws IOException {
 CleartkExtractor<DocumentAnnotation, Token> countsExtractor = new CleartkExtractor<DocumentAnnotation, Token>(
   Token.class,
   new CoveredTextExtractor<Token>(),
   new CleartkExtractor.Count(new CleartkExtractor.Covered()));
 CentroidTfidfSimilarityExtractor<String, DocumentAnnotation> simExtractor = new CentroidTfidfSimilarityExtractor<String, DocumentAnnotation>(
   DocumentClassificationAnnotator.CENTROID_TFIDF_SIM_EXTRACTOR_KEY,
   countsExtractor);
 if (this.tfIdfCentroidSimilarityUri != null) {
  simExtractor.load(this.tfIdfCentroidSimilarityUri);
 }
 return simExtractor;
}

public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // a feature extractor that creates features corresponding to the word, the word lower cased
 // the capitalization of the word, the numeric characterization of the word, and character ngram
 // suffixes of length 2 and 3.
 this.tokenFeatureExtractor = new FeatureFunctionExtractor<Token>(
   new CoveredTextExtractor<Token>(),
   new LowerCaseFeatureFunction(),
   new CapitalTypeFeatureFunction(),
   new NumericTypeFeatureFunction(),
   new CharacterNgramFeatureFunction(Orientation.RIGHT_TO_LEFT, 0, 2),
   new CharacterNgramFeatureFunction(Orientation.RIGHT_TO_LEFT, 0, 3));
 // a feature extractor that extracts the surrounding token texts (within the same sentence)
 this.contextFeatureExtractor = new CleartkExtractor<Token, Token>(
   Token.class,
   new CoveredTextExtractor<Token>(),
   new Preceding(2),
   new Following(2));
}

new CombinedExtractor1
  new CoveredTextExtractor(),
  new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
  new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),

new CombinedExtractor1
  new CoveredTextExtractor(),
  new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
  new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),

  new CoveredTextExtractor<Token>()) {
 @Override
 protected boolean accept(Token token) {
tgtExtractors.add(new CleartkExtractor<Time, Token>(Token.class, new CoveredTextExtractor<Token>(), new Bag(new Covered())));
tgtExtractors.add(new TypePathExtractor<Time>(Time.class, "timeType"));
tgtExtractors.add(new TypePathExtractor<Time>(Time.class, "value"));

private FeatureExtractor1<Sentence> createTokenCountsExtractor() {
 FeatureExtractor1<Token> tokenFieldExtractor = new CoveredTextExtractor<Token>();
 switch (this.tokenField) {
  case COVERED_TEXT:
   tokenFieldExtractor = new CoveredTextExtractor<Token>();
   break;
  case STEM:
   tokenFieldExtractor = new TypePathExtractor<Token>(Token.class, "stem");
   break;
  case LEMMA:
   tokenFieldExtractor = new TypePathExtractor<Token>(Token.class, "lemma");
   break;
 }
 CleartkExtractor<Sentence, Token> countsExtractor = new CleartkExtractor<Sentence, Token>(
   Token.class,
   new StopwordRemovingExtractor<Token>(this.stopwords, tokenFieldExtractor),
   new CleartkExtractor.Count(new CleartkExtractor.Covered()));
 return countsExtractor;
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // the token feature extractor: text, char pattern (uppercase, digits, etc.), and part-of-speech
 this.extractor = new CombinedExtractor1<Token>(
   new FeatureFunctionExtractor<Token>(
     new CoveredTextExtractor<Token>(),
     new CharacterCategoryPatternFunction<Token>(PatternType.REPEATS_MERGED)),
   new TypePathExtractor<Token>(Token.class, "pos"));
 // the context feature extractor: the features above for the 3 preceding and 3 following tokens
 this.contextExtractor = new CleartkExtractor<Token, Token>(
   Token.class,
   this.extractor,
   new Preceding(3),
   new Following(3));
 // the chunking definition: Tokens will be combined to form NamedEntityMentions, with labels
 // from the "mentionType" attribute so that we get B-location, I-person, etc.
 this.chunking = new BioChunking<Token, NamedEntityMention>(
   Token.class,
   NamedEntityMention.class,
   "mentionType");
}

new CombinedExtractor1
  new CoveredTextExtractor(),
  new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
  new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),
new CleartkExtractor(
  BaseToken.class, 
  new CoveredTextExtractor(),

new CombinedExtractor1
  new CoveredTextExtractor(),
  new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
  new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),
new CleartkExtractor(
  BaseToken.class, 
  new CoveredTextExtractor(),

Javadoc

Most used methods

Popular in Java

Parsing JSON documents to java classes using gson
runOnUiThread (Activity)
requestLocationUpdates (LocationManager)
putExtra (Intent)
EOFException (java.io)
Thrown when a program encounters the end of a file or stream during an input operation.
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
BorderLayout (java.awt)
A border layout lays out a container, arranging and resizing its components to fit in five regions:
FlowLayout (java.awt)
A flow layout arranges components in a left-to-right flow, much like lines of text in a paragraph. F
JFrame (javax.swing)
JLabel (javax.swing)
CodeWhisperer alternatives

How to useCoveredTextExtractor in org.cleartk.ml.feature.extractor

Best Java code snippets using org.cleartk.ml.feature.extractor.CoveredTextExtractor (Showing top 20 results out of 315)

How to use
CoveredTextExtractor
in
org.cleartk.ml.feature.extractor