edu.umass.cs.mallet.base.types.InstanceList.getTargetAlphabet java code examples

public RankedFeatureVector newRankedFeatureVector (InstanceList ilist)
{
  assert (ilist.getTargetAlphabet() == classifications[0].getAlphabet());
  return new ExpGain (ilist, classifications, gaussianPriorVariance);
}

public LabelVector targetLabelDistribution ()
{
  if (instances.size() == 0) return null;
  if (!(getInstance(0).getTarget() instanceof Labeling))
    throw new IllegalStateException ("Target is not a labeling.");
  double[] counts = new double[getTargetAlphabet().size()];
  for (int i = 0; i < instances.size(); i++) {
    Instance instance =  getInstance(i);
    Labeling l = (Labeling) instance.getTarget();
    l.addTo (counts, getInstanceWeight(i));
  }
  return new LabelVector ((LabelAlphabet)getTargetAlphabet(), counts);
}

/**
 * Calculates the minimum description length of this node, i.e., 
 * the length of the binary encoding that describes the feature 
 * and the split value used at this node
 */
public double getMDL()
{
  int numClasses = m_ilist.getTargetAlphabet().size();
  double mdl = getSize() * getGainRatio().getBaseEntropy();
  mdl += ((numClasses-1) * Math.log(getSize() / 2.0)) / (2 * GainRatio.log2);
  double piPow = Math.pow(Math.PI, numClasses/2.0);
  double gammaVal = Maths.gamma(numClasses/2.0);
  mdl += Math.log(piPow/gammaVal) / GainRatio.log2;
  return mdl;
}

public PerLabelFeatureCounts (InstanceList ilist)
{
  dataAlphabet = ilist.getDataAlphabet();
  targetAlphabet = ilist.getTargetAlphabet();
  double[][] counts = calcFeatureCounts (ilist);
  fc = new FeatureCounts[targetAlphabet.size()];
  for (int i = 0; i < fc.length; i++)
    fc[i] = new FeatureCounts (dataAlphabet, counts[i]);
}

public PerLabelInfoGain (InstanceList ilist)
{
  double[][] pcig = calcPerLabelInfoGains (ilist);
  Alphabet v = ilist.getDataAlphabet();
  int numClasses = ilist.getTargetAlphabet().size();
  ig = new InfoGain[numClasses];
  for (int i = 0; i < numClasses; i++)
    ig[i] = new InfoGain (v, pcig[i]);
}

Alphabet la = ilist.getTargetAlphabet ();
for (int i = 0; i < la.size(); i++)
  System.out.println (la.lookupObject (i));

int numClasses = ilist.getTargetAlphabet().size();
int numFeatures = ilist.getDataAlphabet().size();
double[] infogains = new double[numFeatures];
  staticBaseLabelDistribution = new LabelVector ((LabelAlphabet)ilist.getTargetAlphabet(), targetCount);
  return infogains;
    staticBaseEntropy -= p * Math.log(p) / log2;
staticBaseLabelDistribution = new LabelVector ((LabelAlphabet)ilist.getTargetAlphabet(), classDistribution);

System.out.println("target alphabet size " + instList.getTargetAlphabet().size());
InstanceList instList2 = new InstanceList(instPipe);
instList2.add(new
System.out.println("target alphabet size " + instList2.getTargetAlphabet().size());

  public void induceFeatures (InstanceList ilist, boolean withFeatureShrinkage, boolean inducePerClassFeatures)
  {
    if (inducePerClassFeatures) {
      int numClasses = ilist.getTargetAlphabet().size();
//            int numFeatures = ilist.getDataAlphabet().size();
      FeatureSelection[] pcfs = new FeatureSelection[numClasses];
      for (int j = 0; j < numClasses; j++)
        pcfs[j] = (FeatureSelection) ilist.getPerLabelFeatureSelection()[j].clone();
      for (int i = 0; i < ilist.size(); i++) {
        Object data = ilist.getInstance(i).getData();
        AugmentableFeatureVector afv = (AugmentableFeatureVector) data;
        root.induceFeatures (afv, null, pcfs, ilist.getFeatureSelection(), ilist.getPerLabelFeatureSelection(),
                             withFeatureShrinkage, inducePerClassFeatures, addFeaturesClassEntropyThreshold);
      }
    } else {
      throw new UnsupportedOperationException ("Not yet implemented");
    }
  }

System.out.println("target alphabet size " + instList.getTargetAlphabet().size());
InstanceList instList2 = new InstanceList(instPipe);
instList2.add(new
System.out.println("target alphabet size " + instList2.getTargetAlphabet().size());

private static double[] calcGradientGains (InstanceList ilist, LabelVector[] classifications)
  int numClasses = ilist.getTargetAlphabet().size();
  int numFeatures = ilist.getDataAlphabet().size();
  double[] gradientgains = new double[numFeatures];
    assert (classifications[i].getLabelAlphabet() == ilist.getTargetAlphabet());
    Instance inst = ilist.getInstance(i);
    Labeling labeling = inst.getLabeling ();

System.out.println("target alphabet size " + instList.getTargetAlphabet().size());
InstanceList instList2 = new InstanceList(instPipe);
instList2.add(new
System.out.println("target alphabet size " + instList2.getTargetAlphabet().size());

int numClasses = ilist.getTargetAlphabet().size();
int numFeatures = ilist.getDataAlphabet().size();
Alphabet dataAlphabet = ilist.getDataAlphabet();

LabelAlphabet ld = (LabelAlphabet) ilist.getTargetAlphabet();

LabelAlphabet ld = (LabelAlphabet) ilist.getTargetAlphabet();

private static double[] calcFeatureCounts (InstanceList ilist)
{
  int numInstances = ilist.size();
  int numClasses = ilist.getTargetAlphabet().size();
  int numFeatures = ilist.getDataAlphabet().size();
  double[] counts = new double[numFeatures];
  double count;
  for (int i = 0; i < ilist.size(); i++) {
    Instance inst = ilist.getInstance(i);
    if (!(inst.getData() instanceof FeatureVector))
      throw new IllegalArgumentException ("Currently only handles FeatureVector data");
    FeatureVector fv = (FeatureVector) inst.getData ();
    if (ilist.getInstanceWeight(i) == 0)
      continue;
    for (int j = 0; j < fv.numLocations(); j++) {
      if (countInstances)
        counts[fv.indexAtLocation(j)] += 1;
      else
        counts[fv.indexAtLocation(j)] += fv.valueAtLocation(j);
    }                    
  }
  return counts;
}

private static double[][] calcFeatureCounts (InstanceList ilist)
{
  int numClasses = ilist.getTargetAlphabet().size();
  int numFeatures = ilist.getDataAlphabet().size();
  double[][] featureCounts = new double[numClasses][numFeatures];
  // Count features across all classes
  for (int i = 0; i < ilist.size(); i++) {
    Instance inst = ilist.getInstance(i);
    if (!(inst.getData() instanceof FeatureVector))
      throw new IllegalArgumentException ("Currently only handles FeatureVector data");
    FeatureVector fv = (FeatureVector) inst.getData ();
    // xxx Note that this ignores uncertain-labels.
    int labelIndex = inst.getLabeling ().getBestIndex();
    int fli;
    for (int fl = 0; fl < fv.numLocations(); fl++) {
      fli = fv.indexAtLocation(fl);
      if (countInstances)
        featureCounts[labelIndex][fli]++;
      else
        featureCounts[labelIndex][fli] += fv.valueAtLocation(fl);
    }
  }
  return featureCounts;
}

int numLabels = trainingList.getTargetAlphabet().size();
int numFeats = dict.size();
m_weights = new double [numLabels][numFeats+1];

trainingList.getTargetAlphabet().stopGrowth();
Pipe dataPipe = trainingList.getPipe ();
Alphabet dict = (Alphabet) trainingList.getDataAlphabet ();
int numLabels = trainingList.getTargetAlphabet().size();
int numFeats = dict.size(); 
this.theta =  numFeats * this.nfactor;

public void testStringTrained ()
{
  String[] africaTraining = new String[] {
    "on the plains of africa the lions roar",
    "in swahili ngoma means to dance",
    "nelson mandela became president of south africa",
    "the saraha dessert is expanding"};
  String[] asiaTraining = new String[] {
    "panda bears eat bamboo",
    "china's one child policy has resulted in a surplus of boys",
    "tigers live in the jungle"};
  InstanceList instances =
    new InstanceList (
      new SerialPipes (new Pipe[] {
        new Target2Label (),
        new CharSequence2TokenSequence (),
        new TokenSequence2FeatureSequence (),
        new FeatureSequence2FeatureVector ()}));
  instances.add (new ArrayIterator (africaTraining, "africa"));
  instances.add (new ArrayIterator (asiaTraining, "asia"));
  Classifier c = new NaiveBayesTrainer ().train (instances);
  Classification cf = c.classify ("nelson mandela never eats lions");
  assertTrue (cf.getLabeling().getBestLabel()
              == ((LabelAlphabet)instances.getTargetAlphabet()).lookupLabel("africa"));
}

Javadoc

Returns the Alphabet mapping target output labels to integers.

Popular methods of InstanceList

<init>
add
Constructs and appends an instance to this list, passing it through this list's pipe and assigning i
classNamesOfSize
cloneEmpty
crossValidationIterator
dictOfSize
get
Returns the Instance at the specified index.
getDataAlphabet
Returns the Alphabet mapping features of the data to integers.
getFeatureSelection
getInstance
Returns the Instance at the specified index.
getInstanceWeight
getPerLabelFeatureSelection

Popular in Java

Finding current android device location
getSupportFragmentManager (FragmentActivity)
requestLocationUpdates (LocationManager)
getContentResolver (Context)
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
Kernel (java.awt.image)
Join (org.hibernate.mapping)
From CI to AI: The AI layer in your organization

How to use getTargetAlphabetmethodin edu.umass.cs.mallet.base.types.InstanceList

Best Java code snippets using edu.umass.cs.mallet.base.types.InstanceList.getTargetAlphabet (Showing top 20 results out of 315)

How to use
getTargetAlphabet
method
in
edu.umass.cs.mallet.base.types.InstanceList