private boolean isValidPath(JCas view) { if (!pathChecked) { boolean validPath = isValidPath(type, path, view); if (validPath) pathChecked = true; return validPath; } else return true; }
@Override public java.util.List<Feature> extract(JCas view, FOCUS_T annotation1, FOCUS_T annotation2) throws CleartkExtractorException { return this.extractBetween(view, annotation1, annotation2); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>( Token.class, "stem"), new Bag(new Covered()))); this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>( Token.class, "pos"), new Bag(new Covered()))); }
public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // Create an extractor that gives word counts for a document this.extractor = new CleartkExtractor<DocumentAnnotation, Token>( Token.class, new CoveredTextExtractor<Token>(), new Count(new Covered())); }
public MentionClusterMentionFeaturesExtractor(String vectorFile) throws CleartkExtractorException { if(vectorFile != null){ this.continuousText = new ContinuousTextExtractor(vectorFile); this.tokenVectorContext = new CleartkExtractor<>( BaseToken.class, continuousText, new FirstCovered(1), new LastCovered(1), // new Bag(new Covered()), new Preceding(1), new Following(1)); } }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.contextExtractors.add(new CleartkExtractor<Event, Token>( Token.class, new TokenTextForSelectedPosExtractor("RB"), new Bag(new Preceding(3)))); }
@Override public <SEARCH_T extends Annotation> List<Feature> extract( JCas jCas, Annotation focusAnnotation, Bounds bounds, Class<SEARCH_T> annotationClass, FeatureExtractor1<SEARCH_T> extractor) throws CleartkExtractorException { List<Feature> features = new ArrayList<Feature>(); for (Feature feature : extractor.extract(jCas, annotationClass.cast(focusAnnotation))) { features.add(new ContextFeature(this.getName(), feature)); } return features; }
public static CleartkExtractorException invalidTypePath(String path, Type type) { String key = KEY_PREFIX + "invalidTypePath"; return new CleartkExtractorException(DEFAULT_RESOURCE_BUNDLE, key, path, type); }
public FilteringExtractor(Class<T> annotationClass, String name, FeatureExtractor1<T> extractor) { this.annotationClass = annotationClass; this.extractor = new NamingExtractor1<T>(name, extractor); }
public TypePathFeature(String name, Object value, String typePath) { super(value); this.typePath = typePath; this.name = TypePathExtractor.createName(name, typePath); }
@Override public String getFeatureName() { return this.extractor.getFeatureName(); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>( Token.class, "pos"), new Bag(new Covered()))); this.contextExtractors.add(new CleartkExtractor<Event, Token>( Token.class, new TokenTextForSelectedPosExtractor("VB"), new Bag(new Preceding(3)))); }
private TfidfExtractor<String, DocumentAnnotation> initTfIdfExtractor() throws IOException { CleartkExtractor<DocumentAnnotation, Token> countsExtractor = new CleartkExtractor<DocumentAnnotation, Token>( Token.class, new CoveredTextExtractor<Token>(), new CleartkExtractor.Count(new CleartkExtractor.Covered())); TfidfExtractor<String, DocumentAnnotation> tfIdfExtractor = new TfidfExtractor<String, DocumentAnnotation>( DocumentClassificationAnnotator.TFIDF_EXTRACTOR_KEY, countsExtractor); if (this.tfIdfUri != null) { tfIdfExtractor.load(this.tfIdfUri); } return tfIdfExtractor; }
public MentionClusterMentionFeaturesExtractor(String vectorFile) throws CleartkExtractorException { if(vectorFile != null){ this.continuousText = new ContinuousTextExtractor(vectorFile); this.tokenVectorContext = new CleartkExtractor<>( BaseToken.class, continuousText, new FirstCovered(1), new LastCovered(1), // new Bag(new Covered()), new Preceding(1), new Following(1)); } }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.contextExtractors.add(new CleartkExtractor<Event, Token>( Token.class, new TokenTextForSelectedPosExtractor("RB", "MD", "TO", "IN"), new Bag(new Preceding(3)))); }
public List<Feature> extractBetween(JCas view, FOCUS_T annotation1, FOCUS_T annotation2) throws CleartkExtractorException { int begin = annotation1.getEnd(); int end = annotation2.getBegin(); // FIXME: creating a new annotation may leak memory - is there a better approach? Annotation focusAnnotation = new Annotation(view, begin, end); return this.extract(view, focusAnnotation, new NoBounds()); }
public static CleartkExtractorException moreThanOneName(String name1, String name2) { String key = KEY_PREFIX + "moreThanOneName"; return new CleartkExtractorException(DEFAULT_RESOURCE_BUNDLE, key, name1, name2); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.eventFeatureExtractors.add(new TextSliceExtractor<Event>(-2)); this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>( Token.class, "pos"), new Bag(new Covered()))); this.contextExtractors.add(new CleartkExtractor<Event, Token>( Token.class, new TokenTextForSelectedPosExtractor("MD", "TO", "IN", "VB"), new Preceding(3))); }
private CentroidTfidfSimilarityExtractor<String, DocumentAnnotation> initCentroidTfIdfSimilarityExtractor() throws IOException { CleartkExtractor<DocumentAnnotation, Token> countsExtractor = new CleartkExtractor<DocumentAnnotation, Token>( Token.class, new CoveredTextExtractor<Token>(), new CleartkExtractor.Count(new CleartkExtractor.Covered())); CentroidTfidfSimilarityExtractor<String, DocumentAnnotation> simExtractor = new CentroidTfidfSimilarityExtractor<String, DocumentAnnotation>( DocumentClassificationAnnotator.CENTROID_TFIDF_SIM_EXTRACTOR_KEY, countsExtractor); if (this.tfIdfCentroidSimilarityUri != null) { simExtractor.load(this.tfIdfCentroidSimilarityUri); } return simExtractor; }