@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); List<FeatureExtractor1<Event>> extractors = Lists.newArrayList(); extractors.add(new TypePathExtractor<Event>(Event.class, "tense")); extractors.add(new TypePathExtractor<Event>(Event.class, "aspect")); extractors.add(new TypePathExtractor<Event>(Event.class, "eventClass")); this.setSourceExtractors(extractors); this.setTargetExtractors(extractors); }
@Override public List<org.cleartk.ml.Feature> extract(JCas view, Annotation focusAnnotation) throws CleartkExtractorException { if (this.type == null) this.type = JCasUtil.getType(view, this.focusClass); this.typeSystem = view.getTypeSystem(); if (!isValidPath(view)) throw CleartkExtractorException.invalidTypePath(path, type); String[] pathMembers = path.split("/"); List<Object> pathValues = new ArrayList<Object>(); _extract(view, focusAnnotation, pathMembers, pathValues); List<org.cleartk.ml.Feature> returnValues = new ArrayList<org.cleartk.ml.Feature>(); Set<Object> values = new HashSet<Object>(); for (Object pathValue : pathValues) { if (!uniqueValues || !values.contains(pathValue)) { returnValues.add(new TypePathFeature(null, pathValue, this.path, this.featureName)); values.add(pathValue); } } return returnValues; }
public TypePathFeature(String name, Object value, String typePath) { super(value); this.typePath = typePath; this.name = TypePathExtractor.createName(name, typePath); }
Object pathValue = getPrimitiveFeatureValue(view, featureStructure, feature); if (pathValue != null) pathValues.add(pathValue); Type componentType = featureType.getComponentType(); if (componentType.isPrimitive()) { Object[] values = getPrimitiveArrayFeatureValue(view, featureStructure, feature); if (allValues) pathValues.addAll(Arrays.asList(values)); for (int i = 0; i < fsArray.size(); i++) { FeatureStructure fs = fsArray.get(i); _extract(view, fs, remainingPathMembers, pathValues); _extract(view, fsArray.get(0), remainingPathMembers, pathValues); _extract(view, featureValue, remainingPathMembers, pathValues);
private boolean isValidPath(JCas view) { if (!pathChecked) { boolean validPath = isValidPath(type, path, view); if (validPath) pathChecked = true; return validPath; } else return true; }
public static boolean isValidPath(Type type, String path, JCas view) { String[] pathMembers = path.split("/"); Type pathMemberType = type; // will be set to type of last path member // feature type for (String pathMember : pathMembers) { Feature feature = pathMemberType.getFeatureByBaseName(pathMember); if (feature == null) { return false; } pathMemberType = feature.getRange(); if (pathMemberType.isArray()) pathMemberType = pathMemberType.getComponentType(); } return isValidType(pathMemberType, view.getTypeSystem()); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // I explored a ton of features here, and the following were the only ones that worked // The only feature that I didn't try that seems like it might still have some promise // would be to find any times within, say, 5 tokens, and do the time value comparison // to see whether the nearby time is before, overlapping with or after the DCT List<FeatureExtractor1<Event>> srcExtractors = Lists.newArrayList(); srcExtractors.add(new TypePathExtractor<Event>(Event.class, "tense")); srcExtractors.add(new TypePathExtractor<Event>(Event.class, "aspect")); srcExtractors.add(new TypePathExtractor<Event>(Event.class, "eventClass")); srcExtractors.add(new TypePathExtractor<Event>(Event.class, "polarity")); srcExtractors.add(new TypePathExtractor<Event>(Event.class, "modality")); // the word, but only if it's an aspectual event srcExtractors.add( new FilteringExtractor<Event>(Event.class, new CoveredTextExtractor<Event>()) { @Override protected boolean accept(Event event) { return event.getEventClass().equals("ASPECTUAL"); } }); this.setSourceExtractors(srcExtractors); }
boolean returnAllValues, boolean uniqueValues) { this.featureName = createName(null, typePath); this.focusClass = focusClass; this.path = typePath;
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>( Token.class, "stem"), new Bag(new Covered()))); this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>( Token.class, "pos"), new Bag(new Covered()))); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // add features: word, stem, pos this.tokenFeatureExtractors = Lists.newArrayList(); this.tokenFeatureExtractors.add(new CoveredTextExtractor<Token>()); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "stem")); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "pos")); this.tokenFeatureExtractors.add(new ParentNodeFeaturesExtractor()); // add window of features before and after this.contextExtractors = Lists.newArrayList(); this.contextExtractors.add(new CleartkExtractor<Token, Token>( Token.class, new CoveredTextExtractor<Token>(), new Preceding(3), new Following(3))); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); // define chunking type this.chunking = new BioChunking<Token, Time>(Token.class, Time.class); // add features: word, character pattern, stem, pos this.tokenFeatureExtractors = Lists.newArrayList(); this.tokenFeatureExtractors.add(new CoveredTextExtractor<Token>()); NamedFeatureExtractor1<Token> ex = CharacterCategoryPatternFunction.createExtractor(); this.tokenFeatureExtractors.add(ex); this.tokenFeatureExtractors.add(new TimeWordsExtractor<Token>()); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "stem")); this.tokenFeatureExtractors.add(new TypePathExtractor<Token>(Token.class, "pos")); // add window of features before and after this.contextFeatureExtractors = Lists.newArrayList(); for (FeatureExtractor1<Token> extractor : this.tokenFeatureExtractors) { this.contextFeatureExtractors.add(new CleartkExtractor<Token, Token>(Token.class, extractor, new Preceding( 3), new Following(3))); } }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); List<FeatureExtractor1<Event>> extractors = Lists.newArrayList(); extractors.add(new TypePathExtractor<Event>(Event.class, "tense")); extractors.add(new TypePathExtractor<Event>(Event.class, "aspect")); extractors.add(new TypePathExtractor<Event>(Event.class, "eventClass")); extractors.add(new SyntacticFirstChildOfGrandparentOfLeafExtractor<Event>()); this.setSourceExtractors(extractors); this.setTargetExtractors(extractors); List<FeatureExtractor2<Anchor, Anchor>>btweenExtractors = Lists.newArrayList(); btweenExtractors.add(new SyntacticLeafToLeafPathPartsExtractor<Anchor, Anchor>()); btweenExtractors.add(new CleartkExtractor<Anchor, Token>(Token.class, new CoveredTextExtractor<Token>(), new Bag(new Covered()))); this.setBetweenExtractors(btweenExtractors); }
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.eventFeatureExtractors.add(new TextSliceExtractor<Event>(-2)); this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>( Token.class, "pos"), new Bag(new Covered()))); this.contextExtractors.add(new CleartkExtractor<Event, Token>( Token.class, new TokenTextForSelectedPosExtractor("MD", "TO", "IN", "VB"), new Preceding(3))); }
new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily") );
new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily") );
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); this.eventFeatureExtractors.add(new CleartkExtractor<Event, Token>(Token.class, new TypePathExtractor<Token>( Token.class, "pos"), new Bag(new Covered()))); this.contextExtractors.add(new CleartkExtractor<Event, Token>( Token.class, new TokenTextForSelectedPosExtractor("VB"), new Bag(new Preceding(3)))); }
new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily") ); new CleartkExtractor( BaseToken.class, new TypePathExtractor(BaseToken.class, "partOfSpeech"), new CleartkExtractor.LastCovered(2), new CleartkExtractor.Preceding(3),
srcExtractors.add(new TypePathExtractor<Event>(Event.class, "tense")); srcExtractors.add(new TypePathExtractor<Event>(Event.class, "eventClass")); srcExtractors.add(new CleartkExtractor<Event, Token>(Token.class, prepOrVerbExtractor, new Ngram(new Following(5)))); this.setSourceExtractors(srcExtractors); tgtExtractors.add(new TypePathExtractor<Time>(Time.class, "timeType")); tgtExtractors.add(new TypePathExtractor<Time>(Time.class, "value")); tgtExtractors.add(new CleartkExtractor<Time, Token>(Token.class, prepOrVerbExtractor, new Ngram(new Preceding(5)))); this.setTargetExtractors(tgtExtractors);
private FeatureExtractor1<Sentence> createTokenCountsExtractor() { FeatureExtractor1<Token> tokenFieldExtractor = new CoveredTextExtractor<Token>(); switch (this.tokenField) { case COVERED_TEXT: tokenFieldExtractor = new CoveredTextExtractor<Token>(); break; case STEM: tokenFieldExtractor = new TypePathExtractor<Token>(Token.class, "stem"); break; case LEMMA: tokenFieldExtractor = new TypePathExtractor<Token>(Token.class, "lemma"); break; } CleartkExtractor<Sentence, Token> countsExtractor = new CleartkExtractor<Sentence, Token>( Token.class, new StopwordRemovingExtractor<Token>(this.stopwords, tokenFieldExtractor), new CleartkExtractor.Count(new CleartkExtractor.Covered())); return countsExtractor; }
new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhrase"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseCategory"), new TypePathExtractor(AssertionCuePhraseAnnotation.class, "cuePhraseAssertionFamily") ); new CleartkExtractor( BaseToken.class, new TypePathExtractor(BaseToken.class, "partOfSpeech"), new CleartkExtractor.LastCovered(2), new CleartkExtractor.Preceding(3),