public TransformableFeature(String name, List<Feature> features) { super(); this.setName(name); this.features = features; }
public static <OUTCOME_T> Iterable<Instance<OUTCOME_T>> loadFromURI(URI uri) { InstanceStream<OUTCOME_T> instanceStream = new InstanceStream<OUTCOME_T>(uri); return instanceStream; }
@Override public java.util.Iterator<Instance<OUTCOME_T>> iterator() { return new InstanceStream.Iterator<OUTCOME_T>(this.uri); }
@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { for (Feature origFeature : ((TransformableFeature) feature).getFeatures()) { features.add(this.transform(origFeature)); } } else { features.add(feature); } } return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
@Override public List<Feature> extract(JCas view, FOCUS_T focusAnnotation) throws CleartkExtractorException { List<Feature> extracted = this.subExtractor.extract(view, focusAnnotation); List<Feature> result = new ArrayList<Feature>(); if (this.isTrained) { // We have trained / loaded a MinMax model, so now fix up the values for (Feature feature : extracted) { result.add(this.transform(feature)); } } else { // We haven't trained this extractor yet, so just mark the existing features // for future modification, by creating one mega container feature result.add(new TransformableFeature(this.name, extracted)); } return result; }
@Override public List<Feature> extract(JCas view, FOCUS_T focusAnnotation) throws CleartkExtractorException { List<Feature> extracted = this.subExtractor.extract(view, focusAnnotation); List<Feature> result = new ArrayList<Feature>(); if (this.isTrained) { // We have trained / loaded a tf*idf model, so now fix up the values for (Feature feature : extracted) { result.add(this.transform(feature)); } } else { // We haven't trained this extractor yet, so just mark the existing features // for future modification, by creating one mega container feature result.add(new TransformableFeature(this.name, extracted)); } return result; }
@Override public List<Feature> extract(JCas view, FOCUS_T focusAnnotation) throws CleartkExtractorException { List<Feature> extracted = this.subExtractor.extract(view, focusAnnotation); List<Feature> result = new ArrayList<Feature>(); if (this.isTrained) { // We have trained / loaded a ZMUS model, so now fix up the values for (Feature feature : extracted) { Feature transformedFeature = this.transform(feature); if (transformedFeature != null) result.add(transformedFeature); } } else { // We haven't trained this extractor yet, so just mark the existing features // for future modification, by creating one mega container feature result.add(new TransformableFeature(this.name, extracted)); } return result; }
@Override public List<Feature> extract(JCas view, FOCUS_T focusAnnotation) throws CleartkExtractorException { List<Feature> extracted = this.subExtractor.extract(view, focusAnnotation); List<Feature> result = new ArrayList<Feature>(); if (this.isTrained) { // We have trained / loaded a centroid tf*idf model, so now compute // a cosine similarity for the extracted values Map<String, Double> extractedFeatureMap = this.featuresToFeatureMap(extracted); result.add(new Feature(name, this.simFunction.distance(extractedFeatureMap, centroidMap))); } else { // We haven't trained this extractor yet, so just mark the existing features // for future modification, by creating one mega container feature result.add(new TransformableFeature(this.name, extracted)); } return result; }
@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { // Filter down to selected features features.addAll(Collections2.filter(((TransformableFeature) feature).getFeatures(), this)); } else { // Pass non-relevant features through w/o filtering features.add(feature); } } return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
public List<Feature> transform(List<Feature> features) { List<Feature> results = Lists.newArrayList(); if (this.isTrained) { results.addAll(Collections2.filter(features, this)); } else { results.add(new TransformableFeature(this.name, features)); } return results; }
/** * Recursively look through all features in an instance to find the features this extractor is * responsible for extracting */ protected List<TransformableFeature> selectTransformableFeatures(Instance<OUTCOME_T> instance) { List<TransformableFeature> features = new ArrayList<TransformableFeature>(); Stack<Feature> featuresToProcess = new Stack<Feature>(); featuresToProcess.addAll(instance.getFeatures()); while (!featuresToProcess.isEmpty()) { Feature currFeature = featuresToProcess.pop(); if (currFeature instanceof TransformableFeature) { TransformableFeature transformable = (TransformableFeature) currFeature; if (transformable.getName().equals(this.name)) { features.add(transformable); } else { featuresToProcess.addAll(transformable.getFeatures()); } } } return features; }
public SummarizationDataWriter(File outputDirectory) throws IOException { super(outputDirectory); this.instanceDataWriter = new InstanceDataWriter<Boolean>(outputDirectory); }
@Override public void write(Instance<Boolean> instance) throws CleartkProcessingException { this.instanceDataWriter.write(instance); }
@Override public void finish() throws CleartkProcessingException { try { // We need to add a "null" instance terminator to gracefully handle the iteration while // reading in serialized objects from file InstanceStream.Terminator<OUTCOME_T> terminator = new InstanceStream.Terminator<OUTCOME_T>(); this.objout.writeObject(terminator); this.objout.close(); } catch (IOException e) { throw new CleartkProcessingException("", "Unable to write terminal instance", e); } }
@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { // Filter down to selected features features.addAll(Collections2.filter(((TransformableFeature) feature).getFeatures(), this)); } else { // Pass non-relevant features through w/o filtering features.add(feature); } } return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
public List<Feature> transform(List<Feature> features) { List<Feature> results = Lists.newArrayList(); if (this.isTrained) { results.addAll(Collections2.filter(features, this)); } else { results.add(new TransformableFeature(this.name, features)); } return results; }
public static <OUTCOME_T> Iterable<Instance<OUTCOME_T>> loadFromDirectory(File dir) { File instancesFile = new File(dir, InstanceDataWriter.INSTANCES_OUTPUT_FILENAME); InstanceStream<OUTCOME_T> instanceStream = new InstanceStream<OUTCOME_T>(instancesFile.toURI()); return instanceStream; }
@Override public Instance<OUTCOME_T> transform(Instance<OUTCOME_T> instance) { List<Feature> features = new ArrayList<Feature>(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { // Filter down to selected features features.addAll(Collections2.filter(((TransformableFeature) feature).getFeatures(), this)); } else { // Pass non-relevant features through w/o filtering features.add(feature); } } return new Instance<OUTCOME_T>(instance.getOutcome(), features); }
@Override public List<Feature> extract(JCas view, FOCUS_T focusAnnotation) throws CleartkExtractorException { List<Feature> extracted = this.subExtractor.extract(view, focusAnnotation); List<Feature> result = new ArrayList<Feature>(); if (this.isTrained) { // Filter out selected features result.addAll(Collections2.filter(extracted, this)); } else { // We haven't trained this extractor yet, so just mark the existing features // for future modification, by creating one uber-container feature result.add(new TransformableFeature(this.name, extracted)); } return result; }
@Override public void train(Iterable<Instance<OUTCOME_T>> instances) { // aggregate statistics for all features and classes this.mutualInfoStats = new MutualInformationStats<OUTCOME_T>(this.smoothingCount); for (Instance<OUTCOME_T> instance : instances) { OUTCOME_T outcome = instance.getOutcome(); for (Feature feature : instance.getFeatures()) { if (this.isTransformable(feature)) { for (Feature untransformedFeature : ((TransformableFeature) feature).getFeatures()) { mutualInfoStats.update(this.nameFeature(untransformedFeature), outcome, 1); } } } } // Compute mutual information score for each feature Set<String> featureNames = mutualInfoStats.classConditionalCounts.rowKeySet(); this.selectedFeatures = Ordering.natural().onResultOf( this.mutualInfoStats.getScoreFunction( this.combineScoreMethod)).reverse().immutableSortedCopy(featureNames); this.isTrained = true; }