cc.mallet.types.InstanceList.getDataAlphabet java code examples

public InfoGain (InstanceList ilist)
{
  super (ilist.getDataAlphabet(), calcInfoGains (ilist));
  baseEntropy = staticBaseEntropy;
  baseLabelDistribution = staticBaseLabelDistribution;
}

public void setPerLabelFeatureSelection (FeatureSelection[] selectedFeatures)
{
  if (selectedFeatures != null) {
    for (int i = 0; i < selectedFeatures.length; i++)
      if (selectedFeatures[i].getAlphabet() != getDataAlphabet())
        throw new IllegalArgumentException ("Vocabularies do not match");
  }
  perLabelFeatureSelection = selectedFeatures;
}

public FeatureCountTool (InstanceList instances) {
  this.instances = instances;
  numFeatures = instances.getDataAlphabet().size();
  featureCounts = new double[numFeatures];
  documentFrequencies = new int[numFeatures];
}

public InfoGain (InstanceList ilist)
{
  super (ilist.getDataAlphabet(), calcInfoGains (ilist));
  baseEntropy = staticBaseEntropy;
  baseLabelDistribution = staticBaseLabelDistribution;
}

public void printParameters(String filename) throws IOException {
  Alphabet alphabet = data.getDataAlphabet();
  PrintWriter out = new PrintWriter(filename);
  
  for (int feature = 0; feature < alphabet.size(); feature++) {
    out.printf("%s\t%.8f\n", alphabet.lookupObject(feature), coefficients[feature]);
  }
  out.close();
}

public void printParameters(String filename) throws IOException {
  Alphabet alphabet = data.getDataAlphabet();
  PrintWriter out = new PrintWriter(filename);
  
  for (int feature = 0; feature < alphabet.size(); feature++) {
    out.printf("%s\t%.8f\n", alphabet.lookupObject(feature), coefficients[feature]);
  }
  out.close();
}

public void printParameters(String filename) throws IOException {
  Alphabet alphabet = data.getDataAlphabet();
  PrintWriter out = new PrintWriter(filename);
  
  for (int feature = 0; feature < alphabet.size(); feature++) {
    out.printf("%s\t%.8f\n", alphabet.lookupObject(feature), coefficients[feature]);
  }
  out.close();
}

/**
 * Returns a ranked feature vector of the gradient gain for the given training data on the given trained CRF.  The
 * instance list must have the target labels as LabelSequence
 */
public static RankedFeatureVector gradientGainFrom(InstanceList ilist, CRF crf) {
 int numFeatures = ilist.getDataAlphabet().size();
 double[] gradientgains = new double[numFeatures];
 fillResults(ilist, crf, gradientgains, null, null);
 return new RankedFeatureVector(ilist.getDataAlphabet(), gradientgains);
}

public void setFeatureSelection (FeatureSelection selectedFeatures)
{
  if (selectedFeatures != null
      && selectedFeatures.getAlphabet() != null  // xxx We allow a null vocabulary here?  See CRF3.java
      && selectedFeatures.getAlphabet() != getDataAlphabet())
    throw new IllegalArgumentException ("Vocabularies do not match");
  featureSelection = selectedFeatures;
}

public ArrayList getInstancesWithFeature (Object feature)
{
  int index = ilist.getDataAlphabet().lookupIndex (feature, false);
  if (index == -1)
    throw new IllegalArgumentException ("Feature "+feature+" not contained in InvertedIndex");
  return getInstancesWithFeature (index);
}

public void setFeatureSelection (FeatureSelection selectedFeatures)
{
  if (selectedFeatures != null
      && selectedFeatures.getAlphabet() != null  // xxx We allow a null vocabulary here?  See CRF3.java
      && selectedFeatures.getAlphabet() != getDataAlphabet())
    throw new IllegalArgumentException ("Vocabularies do not match");
  featureSelection = selectedFeatures;
}

public FeatureCooccurrenceCounter (InstanceList instances) {
  this.instances = instances;
  numFeatures = instances.getDataAlphabet().size();
  featureFeatureCounts = new TIntIntHashMap[numFeatures];
  for (int feature = 0; feature < numFeatures; feature++) {
    featureFeatureCounts[feature] = new TIntIntHashMap();
  }
  documentFrequencies = new int[numFeatures];
}

public FeatureCooccurrenceCounter (InstanceList instances) {
  this.instances = instances;
  numFeatures = instances.getDataAlphabet().size();
  featureFeatureCounts = new TIntIntHashMap[numFeatures];
  for (int feature = 0; feature < numFeatures; feature++) {
    featureFeatureCounts[feature] = new TIntIntHashMap();
  }
  documentFrequencies = new int[numFeatures];
}

public KLGain (InstanceList ilist, Classification[] classifications)
{
  super (ilist.getDataAlphabet(),
         calcKLGains (ilist, getLabelVectorsFromClassifications(classifications)));
}

public GradientGain (InstanceList ilist, Classification[] classifications)
{
  super (ilist.getDataAlphabet(),
         calcGradientGains (ilist, getLabelVectorsFromClassifications(classifications)));
}

public KLGain (InstanceList ilist, Classification[] classifications)
{
  super (ilist.getDataAlphabet(),
         calcKLGains (ilist, getLabelVectorsFromClassifications(classifications)));
}

/** Replaces the <code>Instance</code> at position <code>index</code>
 * with a new one. */
public void setInstance (int index, Instance instance)
{
  assert (this.getDataAlphabet().equals(instance.getDataAlphabet()));
  assert (this.getTargetAlphabet().equals(instance.getTargetAlphabet()));
  this.set(index, instance);
}

public Alphabet getPrunedAlphabet(int minDocs, int maxDocs, int minCount, int maxCount) {
  Alphabet inputAlphabet = instances.getDataAlphabet();
  Alphabet outputAlphabet = new Alphabet();
  for (int inputType = 0; inputType < numFeatures; inputType++) {
    if (featureCounts[inputType] >= minCount && featureCounts[inputType] <= maxCount && documentFrequencies[inputType] >= minDocs && documentFrequencies[inputType] <= maxDocs) {
      outputAlphabet.lookupIndex(inputAlphabet.lookupObject(inputType));
    }
  }
  
  return outputAlphabet;
}

public PerLabelInfoGain (InstanceList ilist)
{
  double[][] pcig = calcPerLabelInfoGains (ilist);
  Alphabet v = ilist.getDataAlphabet();
  int numClasses = ilist.getTargetAlphabet().size();
  ig = new InfoGain[numClasses];
  for (int i = 0; i < numClasses; i++)
    ig[i] = new InfoGain (v, pcig[i]);
}

public Node (InstanceList ilist, Node parent, FeatureSelection fs)
{
  InfoGain ig = new InfoGain (ilist);
  this.featureIndex = ig.getMaxValuedIndexIn (fs);
  this.infoGain = ig.value(featureIndex);
  this.ilist = ilist;
  this.dictionary = ilist.getDataAlphabet();
  this.parent = parent;
  this.labeling = ig.getBaseLabelDistribution();
  this.labelEntropy = ig.getBaseEntropy();
  this.child0 = this.child1 = null;
}

Javadoc

Returns the Alphabet mapping features of the data to integers.

Popular methods of InstanceList

addThruPipe
Adds to this list every instance generated by the iterator, passing each one through this InstanceLi
<init>
get
size
getPipe
Returns the pipe through which each added Instance is passed, which may be null.
add
Constructs and appends an instance to this list, passing it through this list's pipe and assigning i
getTargetAlphabet
Returns the Alphabet mapping target output labels to integers.
iterator
split
getInstanceWeight
save
Saves this InstanceList to file. If the string value of file is "-", then serialize to System.out.
shallowClone

Popular in Java

Creating JSON documents from java classes using gson
setRequestProperty (URLConnection)
findViewById (Activity)
setContentView (Activity)
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
Runnable (java.lang)
Represents a command that can be executed. Often used to run code in a different Thread.
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
HttpServlet (javax.servlet.http)
Provides an abstract class to be subclassed to create an HTTP servlet suitable for a Web site. A sub
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
Menu (java.awt)
Top plugins for WebStorm

How to use getDataAlphabetmethodin cc.mallet.types.InstanceList

Best Java code snippets using cc.mallet.types.InstanceList.getDataAlphabet (Showing top 20 results out of 315)

How to use
getDataAlphabet
method
in
cc.mallet.types.InstanceList