cc.mallet.classify.FeatureConstraintUtil java code examples

/**
 * Set target distributions using "Schapire" heuristic described in 
 * "Learning from Labeled Features using Generalized Expectation Criteria"
 * Gregory Druck, Gideon Mann, Andrew McCallum.
 * 
 * @param labeledFeatures HashMap of feature indices to lists of label indices for that feature.
 * @param numLabels Total number of labels.
 * @param majorityProb Probability mass divided among majority labels.
 * @return Constraints (map of feature index to target distribution), with target
 *         distributions set using heuristic. 
 */
public static HashMap<Integer,double[]> setTargetsUsingHeuristic(HashMap<Integer,ArrayList<Integer>> labeledFeatures, int numLabels, double majorityProb) {
 HashMap<Integer,double[]> constraints = new HashMap<Integer,double[]>();
 Iterator<Integer> keyIter = labeledFeatures.keySet().iterator();
 while (keyIter.hasNext()) {
  int fi = keyIter.next();
  ArrayList<Integer> labels = labeledFeatures.get(fi);
  constraints.put(fi, getHeuristicPrior(labels,numLabels,majorityProb));
 }
 return constraints;
}

public static HashMap<Integer, ArrayList<Integer>> labelFeatures(InstanceList list, ArrayList<Integer> features) {
  return labelFeatures(list,features,true);
}

/**
 * Reads feature constraints from a file, whether they are stored
 * using Strings or indices.
 * 
 * @param filename File with feature constraints.
 * @param data InstanceList used for alphabets.
 * @return Constraints.
 */
public static HashMap<Integer,double[]> readConstraintsFromFile(String filename, InstanceList data) {
 if (testConstraintsFileIndexBased(filename)) {
  return readConstraintsFromFileIndex(filename,data);
 }
 return readConstraintsFromFileString(filename,data);
}

 features = FeatureConstraintUtil.selectFeaturesByInfoGain(list,numConstraints.value);
  ObjectInputStream ois = new ObjectInputStream(new FileInputStream(ldaFile.value));
  ParallelTopicModel lda = (ParallelTopicModel)ois.readObject();
  features = FeatureConstraintUtil.selectTopLDAFeatures(numConstraints.value, lda, list.getDataAlphabet());
constraints = FeatureConstraintUtil.setTargetsUsingData(list, features);
 featuresAndLabels = FeatureConstraintUtil.labelFeatures(list,features);
 constraints = FeatureConstraintUtil.setTargetsUsingHeuristic(featuresAndLabels,list.getTargetAlphabet().size(),majorityProb.value);
 constraints = FeatureConstraintUtil.setTargetsUsingFeatureVoting(featuresAndLabels,list);

HashMap<Integer,ArrayList<Integer>> labeledFeatures = new HashMap<Integer,ArrayList<Integer>>();
double[][] featureLabelCounts = getFeatureLabelCounts(list,true);
 MatrixOps.plusEquals(prob,1e-8);
 MatrixOps.timesEquals(prob, 1./MatrixOps.sum(prob));
 int[] sortedIndices = getMaxIndices(prob);
 ArrayList<Integer> labels = new ArrayList<Integer>();

FeatureConstraintUtil.readConstraintsFromFile(constraintsFile, trainingList);

FeatureConstraintUtil.setTargetsUsingHeuristic(featuresAndLabels, numLabels, 0.9);

/**
 * Set target distributions using estimates from data.
 * 
 * @param list InstanceList used to estimate targets.
 * @param features List of features for constraints.
 * @param normalize Whether to normalize by feature counts
 * @return Constraints (map of feature index to target), with targets
 *         set using estimates from supplied data.
 */
public static HashMap<Integer,double[]> setTargetsUsingData(InstanceList list, ArrayList<Integer> features, boolean useValues, boolean normalize) {
 HashMap<Integer,double[]> constraints = new HashMap<Integer,double[]>();
 
 double[][] featureLabelCounts = getFeatureLabelCounts(list,useValues);
 for (int i = 0; i < features.size(); i++) {
  int fi = features.get(i);
  if (fi != list.getDataAlphabet().size()) {
   double[] prob = featureLabelCounts[fi];
   if (normalize) {
    // Smooth probability distributions by adding a (very)
    // small count.  We just need to make sure they aren't
    // zero in which case the KL-divergence is infinite.
    MatrixOps.plusEquals(prob, 1e-8);
    MatrixOps.timesEquals(prob, 1./MatrixOps.sum(prob));
   }
   constraints.put(fi, prob);
  }
 }
 return constraints;
}

FeatureConstraintUtil.readRangeConstraintsFromFile(constraintsFile, trainingList);

 labelByVoting(labeledFeatures,instance,labelDist);
} else {
 int li = labeling.getBestIndex();

 features = FeatureConstraintUtil.selectFeaturesByInfoGain(list,numConstraints.value);
  ObjectInputStream ois = new ObjectInputStream(new FileInputStream(ldaFile.value));
  ParallelTopicModel lda = (ParallelTopicModel)ois.readObject();
  features = FeatureConstraintUtil.selectTopLDAFeatures(numConstraints.value, lda, list.getDataAlphabet());
constraints = FeatureConstraintUtil.setTargetsUsingData(list, features);
 featuresAndLabels = FeatureConstraintUtil.labelFeatures(list,features);
 constraints = FeatureConstraintUtil.setTargetsUsingHeuristic(featuresAndLabels,list.getTargetAlphabet().size(),majorityProb.value);
 constraints = FeatureConstraintUtil.setTargetsUsingFeatureVoting(featuresAndLabels,list);

HashMap<Integer,ArrayList<Integer>> labeledFeatures = new HashMap<Integer,ArrayList<Integer>>();
double[][] featureLabelCounts = getFeatureLabelCounts(list,true);
 MatrixOps.plusEquals(prob,1e-8);
 MatrixOps.timesEquals(prob, 1./MatrixOps.sum(prob));
 int[] sortedIndices = getMaxIndices(prob);
 ArrayList<Integer> labels = new ArrayList<Integer>();

FeatureConstraintUtil.readConstraintsFromFile(constraintsFile, trainingList);

FeatureConstraintUtil.setTargetsUsingHeuristic(featuresAndLabels, numLabels, 0.9);

/**
 * Set target distributions using estimates from data.
 * 
 * @param list InstanceList used to estimate targets.
 * @param features List of features for constraints.
 * @param normalize Whether to normalize by feature counts
 * @return Constraints (map of feature index to target), with targets
 *         set using estimates from supplied data.
 */
public static HashMap<Integer,double[]> setTargetsUsingData(InstanceList list, ArrayList<Integer> features, boolean useValues, boolean normalize) {
 HashMap<Integer,double[]> constraints = new HashMap<Integer,double[]>();
 
 double[][] featureLabelCounts = getFeatureLabelCounts(list,useValues);
 for (int i = 0; i < features.size(); i++) {
  int fi = features.get(i);
  if (fi != list.getDataAlphabet().size()) {
   double[] prob = featureLabelCounts[fi];
   if (normalize) {
    // Smooth probability distributions by adding a (very)
    // small count.  We just need to make sure they aren't
    // zero in which case the KL-divergence is infinite.
    MatrixOps.plusEquals(prob, 1e-8);
    MatrixOps.timesEquals(prob, 1./MatrixOps.sum(prob));
   }
   constraints.put(fi, prob);
  }
 }
 return constraints;
}

FeatureConstraintUtil.readRangeConstraintsFromFile(constraintsFile, trainingList);

 labelByVoting(labeledFeatures,instance,labelDist);
} else {
 int li = labeling.getBestIndex();

 features = FeatureConstraintUtil.selectFeaturesByInfoGain(list,numConstraints.value);
  ObjectInputStream ois = new ObjectInputStream(new FileInputStream(ldaFile.value));
  ParallelTopicModel lda = (ParallelTopicModel)ois.readObject();
  features = FeatureConstraintUtil.selectTopLDAFeatures(numConstraints.value, lda, list.getDataAlphabet());
constraints = FeatureConstraintUtil.setTargetsUsingData(list, features);
 featuresAndLabels = FeatureConstraintUtil.labelFeatures(list,features);
 constraints = FeatureConstraintUtil.setTargetsUsingHeuristic(featuresAndLabels,list.getTargetAlphabet().size(),majorityProb.value);
 constraints = FeatureConstraintUtil.setTargetsUsingFeatureVoting(featuresAndLabels,list);

/**
 * Reads feature constraints from a file, whether they are stored
 * using Strings or indices.
 * 
 * @param filename File with feature constraints.
 * @param data InstanceList used for alphabets.
 * @return Constraints.
 */
public static HashMap<Integer,double[]> readConstraintsFromFile(String filename, InstanceList data) {
 if (testConstraintsFileIndexBased(filename)) {
  return readConstraintsFromFileIndex(filename,data);
 }
 return readConstraintsFromFileString(filename,data);
}

HashMap<Integer,ArrayList<Integer>> labeledFeatures = new HashMap<Integer,ArrayList<Integer>>();
double[][] featureLabelCounts = getFeatureLabelCounts(list,true);
 MatrixOps.plusEquals(prob,1e-8);
 MatrixOps.timesEquals(prob, 1./MatrixOps.sum(prob));
 int[] sortedIndices = getMaxIndices(prob);
 ArrayList<Integer> labels = new ArrayList<Integer>();

Javadoc

Utility functions for creating feature constraints that can be used with GE training.

Most used methods

setTargetsUsingHeuristic
Set target distributions using "Schapire" heuristic described in "Learning from Labeled Features usi
getFeatureLabelCounts
getHeuristicPrior
getMaxIndices
labelByVoting
labelFeatures
Label features using heuristic described in "Learning from Labeled Features using Generalized Expect
readConstraintsFromFile
Reads feature constraints from a file, whether they are stored using Strings or indices.
readConstraintsFromFileIndex
Reads feature constraints stored using strings from a file. feature_index label_0_prob label_1_prob
readConstraintsFromFileString
Reads feature constraints stored using strings from a file. feature_name (label_name:probability)+ L
readRangeConstraintsFromFile
Reads range constraints stored using strings from a file. Format can be either: feature_name (label_
selectFeaturesByInfoGain
Select features with the highest information gain.
selectTopLDAFeatures
Select top features in LDA topics.

Popular in Java

Making http requests using okhttp
findViewById (Activity)
getSharedPreferences (Context)
getResourceAsStream (ClassLoader)
TimeUnit (java.util.concurrent)
A TimeUnit represents time durations at a given unit of granularity and provides utility methods to
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Table (org.hibernate.mapping)
A relational table
Top Sublime Text plugins

How to useFeatureConstraintUtil in cc.mallet.classify

Best Java code snippets using cc.mallet.classify.FeatureConstraintUtil (Showing top 20 results out of 315)

How to use
FeatureConstraintUtil
in
cc.mallet.classify