org.cleartk.ml.feature.extractor java code examples

@Override
public List<Feature> extract(JCas view, FOCUS_T focusAnnotation) throws CleartkExtractorException {
 return this.extract(view, focusAnnotation, new NoBounds());
}

private boolean isValidPath(JCas view) {
 if (!pathChecked) {
  boolean validPath = isValidPath(type, path, view);
  if (validPath)
   pathChecked = true;
  return validPath;
 } else
  return true;
}

@Override
public java.util.List<Feature> extract(JCas view, FOCUS_T annotation1, FOCUS_T annotation2)
  throws CleartkExtractorException {
 return this.extractBetween(view, annotation1, annotation2);
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>(
   Token.class,
   "stem"), new Bag(new Covered())));
 this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>(
   Token.class,
   "pos"), new Bag(new Covered())));
}

public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 // Create an extractor that gives word counts for a document
 this.extractor = new CleartkExtractor<DocumentAnnotation, Token>(
   Token.class,
   new CoveredTextExtractor<Token>(),
   new Count(new Covered()));
}

 public MentionClusterMentionFeaturesExtractor(String vectorFile) throws CleartkExtractorException {
  if(vectorFile != null){
   this.continuousText = new ContinuousTextExtractor(vectorFile);
   this.tokenVectorContext = new CleartkExtractor<>(
     BaseToken.class,
     continuousText,
     new FirstCovered(1),
     new LastCovered(1),
//          new Bag(new Covered()),
     new Preceding(1),
     new Following(1));
  }
 }

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 this.contextExtractors.add(new CleartkExtractor<Event, Token>(
   Token.class,
   new TokenTextForSelectedPosExtractor("RB"),
   new Bag(new Preceding(3))));
}

@Override
public <SEARCH_T extends Annotation> List<Feature> extract(
  JCas jCas,
  Annotation focusAnnotation,
  Bounds bounds,
  Class<SEARCH_T> annotationClass,
  FeatureExtractor1<SEARCH_T> extractor) throws CleartkExtractorException {
 List<Feature> features = new ArrayList<Feature>();
 for (Feature feature : extractor.extract(jCas, annotationClass.cast(focusAnnotation))) {
  features.add(new ContextFeature(this.getName(), feature));
 }
 return features;
}

public static CleartkExtractorException invalidTypePath(String path, Type type) {
 String key = KEY_PREFIX + "invalidTypePath";
 return new CleartkExtractorException(DEFAULT_RESOURCE_BUNDLE, key, path, type);
}

public FilteringExtractor(Class<T> annotationClass, String name, FeatureExtractor1<T> extractor) {
 this.annotationClass = annotationClass;
 this.extractor = new NamingExtractor1<T>(name, extractor);
}

public TypePathFeature(String name, Object value, String typePath) {
 super(value);
 this.typePath = typePath;
 this.name = TypePathExtractor.createName(name, typePath);
}

@Override
public String getFeatureName() {
 return this.extractor.getFeatureName();
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>(
   Token.class,
   "pos"), new Bag(new Covered())));
 this.contextExtractors.add(new CleartkExtractor<Event, Token>(
   Token.class,
   new TokenTextForSelectedPosExtractor("VB"),
   new Bag(new Preceding(3))));
}

private TfidfExtractor<String, DocumentAnnotation> initTfIdfExtractor() throws IOException {
 CleartkExtractor<DocumentAnnotation, Token> countsExtractor = new CleartkExtractor<DocumentAnnotation, Token>(
   Token.class,
   new CoveredTextExtractor<Token>(),
   new CleartkExtractor.Count(new CleartkExtractor.Covered()));
 TfidfExtractor<String, DocumentAnnotation> tfIdfExtractor = new TfidfExtractor<String, DocumentAnnotation>(
   DocumentClassificationAnnotator.TFIDF_EXTRACTOR_KEY,
   countsExtractor);
 if (this.tfIdfUri != null) {
  tfIdfExtractor.load(this.tfIdfUri);
 }
 return tfIdfExtractor;
}

 public MentionClusterMentionFeaturesExtractor(String vectorFile) throws CleartkExtractorException {
  if(vectorFile != null){
   this.continuousText = new ContinuousTextExtractor(vectorFile);
   this.tokenVectorContext = new CleartkExtractor<>(
     BaseToken.class,
     continuousText,
     new FirstCovered(1),
     new LastCovered(1),
//          new Bag(new Covered()),
     new Preceding(1),
     new Following(1));
  }
 }

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 this.contextExtractors.add(new CleartkExtractor<Event, Token>(
   Token.class,
   new TokenTextForSelectedPosExtractor("RB", "MD", "TO", "IN"),
   new Bag(new Preceding(3))));
}

public List<Feature> extractBetween(JCas view, FOCUS_T annotation1, FOCUS_T annotation2)
  throws CleartkExtractorException {
 int begin = annotation1.getEnd();
 int end = annotation2.getBegin();
 // FIXME: creating a new annotation may leak memory - is there a better approach?
 Annotation focusAnnotation = new Annotation(view, begin, end);
 return this.extract(view, focusAnnotation, new NoBounds());
}

public static CleartkExtractorException moreThanOneName(String name1, String name2) {
 String key = KEY_PREFIX + "moreThanOneName";
 return new CleartkExtractorException(DEFAULT_RESOURCE_BUNDLE, key, name1, name2);
}

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
 super.initialize(context);
 this.eventFeatureExtractors.add(new TextSliceExtractor<Event>(-2));
 this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>(
   Token.class,
   "pos"), new Bag(new Covered())));
 this.contextExtractors.add(new CleartkExtractor<Event, Token>(
   Token.class,
   new TokenTextForSelectedPosExtractor("MD", "TO", "IN", "VB"),
   new Preceding(3)));
}

private CentroidTfidfSimilarityExtractor<String, DocumentAnnotation> initCentroidTfIdfSimilarityExtractor()
  throws IOException {
 CleartkExtractor<DocumentAnnotation, Token> countsExtractor = new CleartkExtractor<DocumentAnnotation, Token>(
   Token.class,
   new CoveredTextExtractor<Token>(),
   new CleartkExtractor.Count(new CleartkExtractor.Covered()));
 CentroidTfidfSimilarityExtractor<String, DocumentAnnotation> simExtractor = new CentroidTfidfSimilarityExtractor<String, DocumentAnnotation>(
   DocumentClassificationAnnotator.CENTROID_TFIDF_SIM_EXTRACTOR_KEY,
   countsExtractor);
 if (this.tfIdfCentroidSimilarityUri != null) {
  simExtractor.load(this.tfIdfCentroidSimilarityUri);
 }
 return simExtractor;
}

How to use org.cleartk.ml.feature.extractor

Best Java code snippets using org.cleartk.ml.feature.extractor (Showing top 20 results out of 315)