public List<Feature> transform(List<Feature> features) { List<Feature> results = Lists.newArrayList(); if (this.isTrained) { results.addAll(Collections2.filter(features, this)); } else { results.add(new TransformableFeature(this.name, features)); } return results; }
@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { // Filter down to selected features features.addAll(Collections2.filter(((TransformableFeature) feature).getFeatures(), this)); } else { // Pass non-relevant features through w/o filtering features.add(feature); } } return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
/** * Recursively look through all features in an instance to find the features this extractor is * responsible for extracting */ protected List<TransformableFeature> selectTransformableFeatures(Instance<OUTCOME_T> instance) { List<TransformableFeature> features = new ArrayList<TransformableFeature>(); Stack<Feature> featuresToProcess = new Stack<Feature>(); featuresToProcess.addAll(instance.getFeatures()); while (!featuresToProcess.isEmpty()) { Feature currFeature = featuresToProcess.pop(); if (currFeature instanceof TransformableFeature) { TransformableFeature transformable = (TransformableFeature) currFeature; if (transformable.getName().equals(this.name)) { features.add(transformable); } else { featuresToProcess.addAll(transformable.getFeatures()); } } } return features; }
public TransformableFeature(String name, List<Feature> features) { super(); this.setName(name); this.features = features; }
@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { // Filter down to selected features features.addAll(Collections2.filter(((TransformableFeature) feature).getFeatures(), this)); } else { // Pass non-relevant features through w/o filtering features.add(feature); } } return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
public List<Feature> transform(List<Feature> features) { List<Feature> results = Lists.newArrayList(); if (this.isTrained) { results.addAll(Collections2.filter(features, this)); } else { results.add(new TransformableFeature(this.name, features)); } return results; }
@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { // Filter down to selected features features.addAll(Collections2.filter(((TransformableFeature) feature).getFeatures(), this)); } else { // Pass non-relevant features through w/o filtering features.add(feature); } } return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
@Override public List<Feature> extract(JCas view, FOCUS_T focusAnnotation) throws CleartkExtractorException { List<Feature> extracted = this.subExtractor.extract(view, focusAnnotation); List<Feature> result = new ArrayList<Feature>(); if (this.isTrained) { // Filter out selected features result.addAll(Collections2.filter(extracted, this)); } else { // We haven't trained this extractor yet, so just mark the existing features // for future modification, by creating one uber-container feature result.add(new TransformableFeature(this.name, extracted)); } return result; }
@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { for (Feature origFeature : ((TransformableFeature) feature).getFeatures()) { features.add(this.transform(origFeature)); } } else { features.add(feature); } } return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
@Override public List<Feature> extract(JCas view, FOCUS_T focusAnnotation) throws CleartkExtractorException { List<Feature> extracted = this.subExtractor.extract(view, focusAnnotation); List<Feature> result = new ArrayList<Feature>(); if (this.isTrained) { // We have trained / loaded a tf*idf model, so now fix up the values for (Feature feature : extracted) { result.add(this.transform(feature)); } } else { // We haven't trained this extractor yet, so just mark the existing features // for future modification, by creating one mega container feature result.add(new TransformableFeature(this.name, extracted)); } return result; }
@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); List<Feature> featuresToTransform = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { // Store off features for later similarity computation featuresToTransform.addAll(((TransformableFeature) feature).getFeatures()); } else { // pass through non-transformable features features.add(feature); } } // Create centroid similarity feature Map<String, Double> featureMap = this.featuresToFeatureMap(featuresToTransform); features.add(new Feature(this.name, new Double(this.simFunction.distance( featureMap, centroidMap)))); return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
@Override public List<Feature> extract(JCas view, FOCUS_T focusAnnotation) throws CleartkExtractorException { List<Feature> extracted = this.subExtractor.extract(view, focusAnnotation); List<Feature> result = new ArrayList<Feature>(); if (this.isTrained) { // We have trained / loaded a MinMax model, so now fix up the values for (Feature feature : extracted) { result.add(this.transform(feature)); } } else { // We haven't trained this extractor yet, so just mark the existing features // for future modification, by creating one mega container feature result.add(new TransformableFeature(this.name, extracted)); } return result; }
protected Map<String, Double> computeCentroid(Iterable<Instance<OUTCOME_T>> instances, IDFMap idfs) { // Now compute centroid of all applicable terms (features) in all instances int numDocuments = idfs.getTotalDocumentCount(); Map<String, Double> newCentroidMap = new HashMap<String, Double>(); for (Instance<OUTCOME_T> instance : instances) { // Grab the matching tf*idf features from the set of all features in an instance for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { // tf*idf features contain a list of features, these are actually what get added // to our document frequency map for (Feature untransformedFeature : ((TransformableFeature) feature).getFeatures()) { String termName = untransformedFeature.getName(); int tf = (Integer) untransformedFeature.getValue(); double tfidf = tf * idfs.getIDF(termName); double sumTfidf = (newCentroidMap.containsKey(termName)) ? sumTfidf = newCentroidMap.get(termName) : 0.0; newCentroidMap.put(termName, sumTfidf + tfidf); } } } } for (Map.Entry<String, Double> entry : newCentroidMap.entrySet()) { double mean = entry.getValue() / numDocuments; newCentroidMap.put(entry.getKey(), mean); } return newCentroidMap; }
@Override public List<Feature> extract(JCas view, FOCUS_T focusAnnotation) throws CleartkExtractorException { List<Feature> extracted = this.subExtractor.extract(view, focusAnnotation); List<Feature> result = new ArrayList<Feature>(); if (this.isTrained) { // We have trained / loaded a ZMUS model, so now fix up the values for (Feature feature : extracted) { Feature transformedFeature = this.transform(feature); if (transformedFeature != null) result.add(transformedFeature); } } else { // We haven't trained this extractor yet, so just mark the existing features // for future modification, by creating one mega container feature result.add(new TransformableFeature(this.name, extracted)); } return result; }
for (Feature untransformedFeature : ((TransformableFeature) feature).getFeatures()) { String featureName = untransformedFeature.getName(); Object featureValue = untransformedFeature.getValue();
@Override public List<Feature> extract(JCas view, FOCUS_T focusAnnotation) throws CleartkExtractorException { List<Feature> extracted = this.subExtractor.extract(view, focusAnnotation); List<Feature> result = new ArrayList<Feature>(); if (this.isTrained) { // We have trained / loaded a centroid tf*idf model, so now compute // a cosine similarity for the extracted values Map<String, Double> extractedFeatureMap = this.featuresToFeatureMap(extracted); result.add(new Feature(name, this.simFunction.distance(extractedFeatureMap, centroidMap))); } else { // We haven't trained this extractor yet, so just mark the existing features // for future modification, by creating one mega container feature result.add(new TransformableFeature(this.name, extracted)); } return result; }
for (Feature untransformedFeature : ((TransformableFeature) feature).getFeatures()) { String featureName = untransformedFeature.getName(); Object featureValue = untransformedFeature.getValue();
protected IDFMap createIdfMap(Iterable<Instance<OUTCOME_T>> instances) { IDFMap newIdfMap = new IDFMap(); // Add instance's term frequencies to the global counts for (Instance<OUTCOME_T> instance : instances) { Set<String> featureNames = new HashSet<String>(); // Grab the matching tf*idf features from the set of all features in an instance for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { // tf*idf features contain a list of features, these are actually what get added // to our document frequency map for (Feature untransformedFeature : ((TransformableFeature) feature).getFeatures()) { featureNames.add(untransformedFeature.getName()); } } } for (String featureName : featureNames) { newIdfMap.add(featureName); } newIdfMap.incTotalDocumentCount(); } return newIdfMap; }
@Override public void train(Iterable<Instance<OUTCOME_T>> instances) { // aggregate statistics for all features this.chi2Function = new Chi2Scorer<OUTCOME_T>(this.yates); for (Instance<OUTCOME_T> instance : instances) { OUTCOME_T outcome = instance.getOutcome(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { for (Feature untransformedFeature : ((TransformableFeature) feature).getFeatures()) { this.chi2Function.update(this.getFeatureName(untransformedFeature), outcome, 1); } } } } // keep only large chi2 valued features this.selectedFeatureNames = Sets.newHashSet(); for (String featureName : this.chi2Function.featValueClassCount.rowKeySet()) { if (this.chi2Function.score(featureName) > this.chi2Threshold) { this.selectedFeatureNames.add(featureName); } } this.isTrained = true; }
@Override public void train(Iterable<Instance<OUTCOME_T>> instances) { // aggregate statistics for all features this.chi2Function = new Chi2Scorer<OUTCOME_T>(this.yates); for (Instance<OUTCOME_T> instance : instances) { OUTCOME_T outcome = instance.getOutcome(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { for (Feature untransformedFeature : ((TransformableFeature) feature).getFeatures()) { this.chi2Function.update(this.getFeatureName(untransformedFeature), outcome, 1); } } } } // keep only large chi2 valued features this.selectedFeatureNames = Sets.newHashSet(); for (String featureName : this.chi2Function.featValueClassCount.rowKeySet()) { if (this.chi2Function.score(featureName) > this.chi2Threshold) { this.selectedFeatureNames.add(featureName); } } this.isTrained = true; }