org.cleartk.ml.feature.extractor.CoveredTextExtractor.<init> java code examples

public TokenTextForSelectedPosExtractor(Collection<String> acceptablePOSTags) {
 this.acceptablePOSTags = new HashSet<String>(acceptablePOSTags);
 this.extractor = new CoveredTextExtractor<Token>();
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // add features: word, stem, pos
 this.tokenFeatureExtractors = Lists.newArrayList();
 this.tokenFeatureExtractors.add(new CoveredTextExtractor<Token>());
 this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "stem"));
 this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "pos"));
 this.tokenFeatureExtractors.add(new ParentNodeFeaturesExtractor());
 // add window of features before and after
 this.contextExtractors = Lists.newArrayList();
 this.contextExtractors.add(new CleartkExtractor<Token, Token>(
   Token.class,
   new CoveredTextExtractor<Token>(),
   new Preceding(3),
   new Following(3)));
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 this.featuresExtractors = Lists.newArrayList();
 this.featuresExtractors.add(new LastWordExtractor<Time>());
 FeatureExtractor1<Time> ex = CharacterCategoryPatternFunction.createExtractor();
 this.featuresExtractors.add(ex);
 this.featuresExtractors.add(new TimeWordsExtractor<Time>());
 this.featuresExtractors.add(new CleartkExtractor<Time, Token>(Token.class, new CoveredTextExtractor<Token>(), new Bag(new Covered())));
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // I explored a ton of features here, and the following were the only ones that worked
 // The only feature that I didn't try that seems like it might still have some promise
 // would be to find any times within, say, 5 tokens, and do the time value comparison
 // to see whether the nearby time is before, overlapping with or after the DCT
 List<FeatureExtractor1<Event>> srcExtractors = Lists.newArrayList();
 srcExtractors.add(new TypePathExtractor<Event>(Event.class, "tense"));
 srcExtractors.add(new TypePathExtractor<Event>(Event.class, "aspect"));
 srcExtractors.add(new TypePathExtractor<Event>(Event.class, "eventClass"));
 srcExtractors.add(new TypePathExtractor<Event>(Event.class, "polarity"));
 srcExtractors.add(new TypePathExtractor<Event>(Event.class, "modality"));
 // the word, but only if it's an aspectual event
 srcExtractors.add(
   new FilteringExtractor<Event>(Event.class, new CoveredTextExtractor<Event>()) {
    @Override
    protected boolean accept(Event event) {
     return event.getEventClass().equals("ASPECTUAL");
    }
   });
 this.setSourceExtractors(srcExtractors);
}

public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // Create an extractor that gives word counts for a document
 this.extractor = new CleartkExtractor<DocumentAnnotation, Token>(
   Token.class,
   new CoveredTextExtractor<Token>(),
   new Count(new Covered()));
}

public VerbClauseTemporalAnnotator() {
 this.eventID = 1;
 FeatureExtractor1<Token> precedingAuxiliaries = new CleartkExtractor<Token, Token>(
   Token.class,
   new TokenTextForSelectedPosExtractor("MD", "TO", "IN", "VB", "RB"),
   new Preceding(3));
 FeatureExtractor1<Token> tokenStemExtractor = new TypePathExtractor<Token>(Token.class, "stem");
 FeatureExtractor1<Token> tokenPOSExtractor = new TypePathExtractor<Token>(Token.class, "pos");
 this.sourceFeatureExtractors = Lists.newArrayList();
 this.sourceFeatureExtractors.add(new NamingExtractor1<Token>("Source", new CoveredTextExtractor<Token>()));
 this.sourceFeatureExtractors.add(new NamingExtractor1<Token>("Source", tokenPOSExtractor));
 this.sourceFeatureExtractors.add(new NamingExtractor1<Token>("Source", tokenStemExtractor));
 this.sourceFeatureExtractors.add(new NamingExtractor1<Token>("Source", precedingAuxiliaries));
 this.targetFeatureExtractors = Lists.newArrayList();
 this.targetFeatureExtractors.add(new NamingExtractor1<Token>("Target", new CoveredTextExtractor<Token>()));
 this.targetFeatureExtractors.add(new NamingExtractor1<Token>("Target", tokenPOSExtractor));
 this.targetFeatureExtractors.add(new NamingExtractor1<Token>("Target", tokenStemExtractor));
 this.targetFeatureExtractors.add(new NamingExtractor1<Token>("Target", precedingAuxiliaries));
 this.betweenAnchorsFeatureExtractors = new ArrayList<FeatureExtractor1<Annotation>>();
 this.betweenAnchorsFeatureExtractors.add(new NamingExtractor1<Annotation>(
   "WordsBetween",
   new CleartkExtractor<Annotation, Token>(Token.class, new CoveredTextExtractor<Token>(), new Bag(new Covered()))));
 this.pathExtractor = new TargetPathExtractor();
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 List<FeatureExtractor1<Event>> extractors = Lists.newArrayList();
 extractors.add(new TypePathExtractor<Event>(Event.class, "tense"));
 extractors.add(new TypePathExtractor<Event>(Event.class, "aspect"));
 extractors.add(new TypePathExtractor<Event>(Event.class, "eventClass"));
 extractors.add(new SyntacticFirstChildOfGrandparentOfLeafExtractor<Event>());
 this.setSourceExtractors(extractors);
 this.setTargetExtractors(extractors);
 
 List<FeatureExtractor2<Anchor, Anchor>>btweenExtractors = Lists.newArrayList();
 btweenExtractors.add(new SyntacticLeafToLeafPathPartsExtractor<Anchor, Anchor>());
 btweenExtractors.add(new CleartkExtractor<Anchor, Token>(Token.class, new CoveredTextExtractor<Token>(), new Bag(new Covered())));
 this.setBetweenExtractors(btweenExtractors);
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // define chunking type
 this.chunking = new BioChunking<Token, Time>(Token.class, Time.class);
 // add features: word, character pattern, stem, pos
 this.tokenFeatureExtractors = Lists.newArrayList();
 this.tokenFeatureExtractors.add(new CoveredTextExtractor<Token>());
 NamedFeatureExtractor1<Token> ex = CharacterCategoryPatternFunction.createExtractor();
 this.tokenFeatureExtractors.add(ex);
 this.tokenFeatureExtractors.add(new TimeWordsExtractor<Token>());
 this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "stem"));
 this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "pos"));
   
 // add window of features before and after
 this.contextFeatureExtractors = Lists.newArrayList();
 for (FeatureExtractor1<Token> extractor : this.tokenFeatureExtractors) {
  this.contextFeatureExtractors.add(new CleartkExtractor<Token, Token>(Token.class, extractor, new Preceding(
    3), new Following(3)));
 }
}

private TfidfExtractor<String, DocumentAnnotation> initTfIdfExtractor() throws IOException {
 CleartkExtractor<DocumentAnnotation, Token> countsExtractor = new CleartkExtractor<DocumentAnnotation, Token>(
   Token.class,
   new CoveredTextExtractor<Token>(),
   new CleartkExtractor.Count(new CleartkExtractor.Covered()));
 TfidfExtractor<String, DocumentAnnotation> tfIdfExtractor = new TfidfExtractor<String, DocumentAnnotation>(
   DocumentClassificationAnnotator.TFIDF_EXTRACTOR_KEY,
   countsExtractor);
 if (this.tfIdfUri != null) {
  tfIdfExtractor.load(this.tfIdfUri);
 }
 return tfIdfExtractor;
}

private CentroidTfidfSimilarityExtractor<String, DocumentAnnotation> initCentroidTfIdfSimilarityExtractor()
  throws IOException {
 CleartkExtractor<DocumentAnnotation, Token> countsExtractor = new CleartkExtractor<DocumentAnnotation, Token>(
   Token.class,
   new CoveredTextExtractor<Token>(),
   new CleartkExtractor.Count(new CleartkExtractor.Covered()));
 CentroidTfidfSimilarityExtractor<String, DocumentAnnotation> simExtractor = new CentroidTfidfSimilarityExtractor<String, DocumentAnnotation>(
   DocumentClassificationAnnotator.CENTROID_TFIDF_SIM_EXTRACTOR_KEY,
   countsExtractor);
 if (this.tfIdfCentroidSimilarityUri != null) {
  simExtractor.load(this.tfIdfCentroidSimilarityUri);
 }
 return simExtractor;
}

public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // a feature extractor that creates features corresponding to the word, the word lower cased
 // the capitalization of the word, the numeric characterization of the word, and character ngram
 // suffixes of length 2 and 3.
 this.tokenFeatureExtractor = new FeatureFunctionExtractor<Token>(
   new CoveredTextExtractor<Token>(),
   new LowerCaseFeatureFunction(),
   new CapitalTypeFeatureFunction(),
   new NumericTypeFeatureFunction(),
   new CharacterNgramFeatureFunction(Orientation.RIGHT_TO_LEFT, 0, 2),
   new CharacterNgramFeatureFunction(Orientation.RIGHT_TO_LEFT, 0, 3));
 // a feature extractor that extracts the surrounding token texts (within the same sentence)
 this.contextFeatureExtractor = new CleartkExtractor<Token, Token>(
   Token.class,
   new CoveredTextExtractor<Token>(),
   new Preceding(2),
   new Following(2));
}

new CombinedExtractor1
  new CoveredTextExtractor(),
  new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
  new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),

new CombinedExtractor1
  new CoveredTextExtractor(),
  new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
  new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),

  new CoveredTextExtractor<Token>()) {
 @Override
 protected boolean accept(Token token) {
tgtExtractors.add(new CleartkExtractor<Time, Token>(Token.class, new CoveredTextExtractor<Token>(), new Bag(new Covered())));
tgtExtractors.add(new TypePathExtractor<Time>(Time.class, "timeType"));
tgtExtractors.add(new TypePathExtractor<Time>(Time.class, "value"));

private FeatureExtractor1<Sentence> createTokenCountsExtractor() {
 FeatureExtractor1<Token> tokenFieldExtractor = new CoveredTextExtractor<Token>();
 switch (this.tokenField) {
  case COVERED_TEXT:
   tokenFieldExtractor = new CoveredTextExtractor<Token>();
   break;
  case STEM:
   tokenFieldExtractor = new TypePathExtractor<Token>(Token.class, "stem");
   break;
  case LEMMA:
   tokenFieldExtractor = new TypePathExtractor<Token>(Token.class, "lemma");
   break;
 }
 CleartkExtractor<Sentence, Token> countsExtractor = new CleartkExtractor<Sentence, Token>(
   Token.class,
   new StopwordRemovingExtractor<Token>(this.stopwords, tokenFieldExtractor),
   new CleartkExtractor.Count(new CleartkExtractor.Covered()));
 return countsExtractor;
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // the token feature extractor: text, char pattern (uppercase, digits, etc.), and part-of-speech
 this.extractor = new CombinedExtractor1<Token>(
   new FeatureFunctionExtractor<Token>(
     new CoveredTextExtractor<Token>(),
     new CharacterCategoryPatternFunction<Token>(PatternType.REPEATS_MERGED)),
   new TypePathExtractor<Token>(Token.class, "pos"));
 // the context feature extractor: the features above for the 3 preceding and 3 following tokens
 this.contextExtractor = new CleartkExtractor<Token, Token>(
   Token.class,
   this.extractor,
   new Preceding(3),
   new Following(3));
 // the chunking definition: Tokens will be combined to form NamedEntityMentions, with labels
 // from the "mentionType" attribute so that we get B-location, I-person, etc.
 this.chunking = new BioChunking<Token, NamedEntityMention>(
   Token.class,
   NamedEntityMention.class,
   "mentionType");
}

new CombinedExtractor1
  new CoveredTextExtractor(),
  new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"),
  new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"),
new CleartkExtractor(
  BaseToken.class, 
  new CoveredTextExtractor(),

new CleartkExtractor<>(
   BaseToken.class,
   new CoveredTextExtractor<>(),
   new CleartkExtractor.LastCovered( 2 ),     // Worked fine

new CoveredTextExtractor<BaseToken>(),

public void initialize(UimaContext context) throws ResourceInitializationException {
 simpleExtractors = Lists.newArrayList();
 FeatureExtractor1<Token> wordExtractor = new CoveredTextExtractor<Token>();
 CharacterNgramFeatureFunction.Orientation fromLeft = CharacterNgramFeatureFunction.Orientation.LEFT_TO_RIGHT;
 CharacterNgramFeatureFunction.Orientation fromRight = CharacterNgramFeatureFunction.Orientation.RIGHT_TO_LEFT;
 simpleExtractors.add(new FeatureFunctionExtractor<Token>(
   wordExtractor,
   new LowerCaseFeatureFunction(),
   new CapitalTypeFeatureFunction(),
   new NumericTypeFeatureFunction(),
   new CharacterNgramFeatureFunction(fromLeft, 0, 1),
   new CharacterNgramFeatureFunction(fromLeft, 0, 2),
   new CharacterNgramFeatureFunction(fromLeft, 0, 3),
   new CharacterNgramFeatureFunction(fromRight, 0, 1),
   new CharacterNgramFeatureFunction(fromRight, 0, 2),
   new CharacterNgramFeatureFunction(fromRight, 0, 3),
   new CharacterNgramFeatureFunction(fromRight, 0, 4),
   new CharacterNgramFeatureFunction(fromRight, 0, 5),
   new CharacterNgramFeatureFunction(fromRight, 0, 6)));
 windowExtractors = Lists.newArrayList();
 windowExtractors.add(new CleartkExtractor<Token, Token>(
   Token.class,
   wordExtractor,
   new Preceding(2),
   new Following(2)));
 windowNGramExtractors = Lists.newArrayList();
 windowNGramExtractors.add(new CleartkExtractor<Token, Token>(Token.class, wordExtractor, new Ngram(
   new Preceding(2)), new Ngram(new Following(2))));
}

Popular methods of CoveredTextExtractor

Popular in Java

Running tasks concurrently on multiple threads
startActivity (Activity)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
addToBackStack (FragmentTransaction)
BufferedReader (java.io)
Wraps an existing Reader and buffers the input. Expensive interaction with the underlying reader is
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
ZipFile (java.util.zip)
This class provides random read access to a zip file. You pay more to read the zip file's central di
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Reflections (org.reflections)
Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
Top 12 Jupyter Notebook extensions

How to use org.cleartk.ml.feature.extractor.CoveredTextExtractorconstructor

Best Java code snippets using org.cleartk.ml.feature.extractor.CoveredTextExtractor.<init> (Showing top 20 results out of 315)

How to use
org.cleartk.ml.feature.extractor.CoveredTextExtractor
constructor