weka.core.Instance.valueSparse java code examples

/**
 * Used to initialize the ranges. For this the values of the first instance is
 * used to save time. Sets low and high to the values of the first instance
 * and width to zero.
 * 
 * @param instance the new instance
 * @param numAtt number of attributes in the model (ignored)
 * @param ranges low, high and width values for all attributes
 */
public void updateRangesFirst(Instance instance, int numAtt, double[][] ranges) {
 for (int i = 0; i < ranges.length; i++) {
  for (int j = 0; j < ranges[i].length; j++) {
   ranges[i][j] = 0.0;
  }
 }
 int numVals = instance.numValues();
 for (int j = 0; j < numVals; j++) {
  int currIndex = instance.index(j);
  if (!instance.isMissingSparse(j)) {
   ranges[currIndex][R_MIN] = instance.valueSparse(j);
   ranges[currIndex][R_MAX] = instance.valueSparse(j);
  } else { // if value was missing
   ranges[currIndex][R_MIN] = Double.POSITIVE_INFINITY;
   ranges[currIndex][R_MAX] = -Double.POSITIVE_INFINITY;
   ranges[currIndex][R_WIDTH] = Double.POSITIVE_INFINITY;
  }
 }
}

/**
 * log(N!) + (sum for all the words i)(log(Pi^ni) - log(ni!))
 *  
 *  where 
 *      N is the total number of words
 *      Pi is the probability of obtaining word i
 *      ni is the number of times the word at index i occurs in the document
 *
 * Actually, this method just computes (sum for all the words i)(log(Pi^ni) because the factorials are irrelevant
 * when posterior class probabilities are computed.
 *
 * @param inst       The instance to be classified
 * @param classIndex The index of the class we are calculating the probability with respect to
 *
 * @return The log of the probability of the document occuring given the class
 */
 
protected double probOfDocGivenClass(Instance inst, int classIndex) {
 double answer = 0;
 for(int i = 0; i < inst.numValues(); i++) {
  if (inst.index(i) != inst.classIndex()) {
   answer += inst.valueSparse(i) * (Math.log(m_probOfWordGivenClass[classIndex][inst.index(i)]) -
       Math.log(m_wordsPerClass[classIndex]));
  }
 }
 return answer;
}

protected static double dotProd(Instance inst1, double[] weights, int classIndex) {
 double result = 0;
 int n1 = inst1.numValues();
 int n2 = weights.length - 1; 
 for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
  int ind1 = inst1.index(p1);
  int ind2 = p2;
  if (ind1 == ind2) {
   if (ind1 != classIndex && !inst1.isMissingSparse(p1)) {
    result += inst1.valueSparse(p1) * weights[p2];
   }
   p1++;
   p2++;
  } else if (ind1 > ind2) {
   p2++;
  } else {
   p1++;
  }
 }
 return (result);
}

protected static double dotProd(Instance inst1, double[] weights,
  int classIndex) {
 double result = 0;
 int n1 = inst1.numValues();
 int n2 = weights.length - 1;
 for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
  int ind1 = inst1.index(p1);
  int ind2 = p2;
  if (ind1 == ind2) {
   if (ind1 != classIndex && !inst1.isMissingSparse(p1)) {
    result += inst1.valueSparse(p1) * weights[p2];
   }
   p1++;
   p2++;
  } else if (ind1 > ind2) {
   p2++;
  } else {
   p1++;
  }
 }
 return (result);
}

protected static double dotProd(Instance inst1, double[] weights,
  int classIndex) {
 double result = 0;
 int n1 = inst1.numValues();
 int n2 = weights.length - 1;
 for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) {
  int ind1 = inst1.index(p1);
  int ind2 = p2;
  if (ind1 == ind2) {
   if (ind1 != classIndex && !inst1.isMissingSparse(p1)) {
    result += inst1.valueSparse(p1) * weights[p2];
   }
   p1++;
   p2++;
  } else if (ind1 > ind2) {
   p2++;
  } else {
   p1++;
  }
 }
 return (result);
}

/**
 * Builds the kernel. Calls the super class method and then also initializes the cache for
 * the diagonal of the dot product matrix.
 */
public void buildKernel(Instances data) throws Exception {
 super.buildKernel(data);
 m_kernelPrecalc = new double[data.numInstances()];
 for (int i = 0; i < data.numInstances(); i++) {
  double sum = 0;
  Instance inst = data.instance(i);
  for (int j = 0; j < inst.numValues(); j++) {
   if (inst.index(j) != data.classIndex()) {
    sum += inst.valueSparse(j) * inst.valueSparse(j);
   }
  }
  m_kernelPrecalc[i] = sum;
 }
}

/**
 * Constructs dense boolean matrix from weka instances, 
 * that is expected to be filled with values 0, ?, 1
 */
public BooleanMatrix(Instances instances) {
  this(instances.numInstances(), instances.numAttributes());
  int row = 0;
  for (Instance instance: instances) {
    for (int i = 0; i < instance.numValues(); i++) {
      int col = instance.index(i);
      double value = instance.valueSparse(i);
      byte b = Double.isNaN(value) ? UNKNOWN : value == 0d ? FALSE : TRUE;
      this.update(row, col, b);
    }
    row++;
  }
}

/**
 * Builds the kernel. Calls the super class method and then also initializes the cache for
 * the diagonal of the dot product matrix.
 */
public void buildKernel(Instances data) throws Exception {
 super.buildKernel(data);
 m_kernelPrecalc = new double[data.numInstances()];
 for (int i = 0; i < data.numInstances(); i++) {
  double sum = 0;
  Instance inst = data.instance(i);
  for (int j = 0; j < inst.numValues(); j++) {
   if (inst.index(j) != data.classIndex()) {
    sum += inst.valueSparse(j) * inst.valueSparse(j);
   }
  }
  m_kernelPrecalc[i] = sum;
 }
}

/**
 * converts a single instance to the required format
 * 
 * @param instance the instance to convert
 * @return the converted instance
 */
protected Instance convertInstance(Instance instance) throws Exception {
 double vals[] = new double[outputFormatPeek().numAttributes()];
 for (int j = 0; j < m_k; j++) {
  for (int i = 0; i < instance.numValues(); i++) {
   int index = instance.index(i);
   if (index != instance.classIndex()) {
    double value = instance.valueSparse(i);
    if (!Utils.isMissingValue(value)) {
     vals[j] += m_rmatrix[j][index] * value;
    }
   } else {
    vals[m_k] = instance.valueSparse(i);
   }
  }
 }
 return new DenseInstance(instance.weight(), vals);
}

/**
 * Calculates the centroid pivot of a node. The node is given
 * in the form of an indices array that contains the 
 * indices of the points inside the node.   
 * @param instList The indices array pointing to the 
 * instances in the node.
 * @param insts The actual instances. The instList
 * points to instances in this object.  
 * @return The calculated centre/pivot of the node.  
 */
public static Instance calcCentroidPivot(int[] instList, Instances insts) {
 double[] attrVals = new double[insts.numAttributes()];
 
 Instance temp;
 for(int i=0; i<instList.length; i++) {
  temp = insts.instance(instList[i]);
  for(int j=0; j<temp.numValues(); j++) {
   attrVals[j] += temp.valueSparse(j);
  }
 }
 for(int j=0, numInsts=instList.length; j<attrVals.length; j++) {
  attrVals[j] /= numInsts;
 }
 temp = new DenseInstance(1.0, attrVals);
 return temp;
}

/**
 * converts a single instance to the required format
 * 
 * @param instance the instance to convert
 * @return the converted instance
 */
protected Instance convertInstance(Instance instance) throws Exception {
 double vals[] = new double[outputFormatPeek().numAttributes()];
 for (int j = 0; j < m_k; j++) {
  for (int i = 0; i < instance.numValues(); i++) {
   int index = instance.index(i);
   if (index != instance.classIndex()) {
    double value = instance.valueSparse(i);
    if (!Utils.isMissingValue(value)) {
     vals[j] += m_rmatrix[j][index] * value;
    }
   } else {
    vals[m_k] = instance.valueSparse(i);
   }
  }
 }
 return new DenseInstance(instance.weight(), vals);
}

/**
 * log(N!) + (sum for all the words i)(log(Pi^ni) - log(ni!))
 *  
 *  where 
 *      N is the total number of words
 *      Pi is the probability of obtaining word i
 *      ni is the number of times the word at index i occurs in the document
 *
 * Actually, this method just computes (sum for all the words i)(log(Pi^ni) because the factorials are irrelevant
 * when posterior class probabilities are computed.
 *
 * @param inst       The instance to be classified
 * @param classIndex The index of the class we are calculating the probability with respect to
 *
 * @return The log of the probability of the document occuring given the class
 */
 
protected double probOfDocGivenClass(Instance inst, int classIndex) {
 double answer = 0;
 for(int i = 0; i < inst.numValues(); i++) {
  if (inst.index(i) != inst.classIndex()) {
   answer += (inst.valueSparse(i) * m_probOfWordGivenClass[classIndex][inst.index(i)]);
  }
 }
 return answer;
}

/**
 * Calculates the centroid pivot of a node. The node is given
 * in the form of an indices array that contains the 
 * indices of the points inside the node.   
 * @param instList The indices array pointing to the 
 * instances in the node.
 * @param insts The actual instances. The instList
 * points to instances in this object.  
 * @return The calculated centre/pivot of the node.  
 */
public static Instance calcCentroidPivot(int[] instList, Instances insts) {
 double[] attrVals = new double[insts.numAttributes()];
 
 Instance temp;
 for(int i=0; i<instList.length; i++) {
  temp = insts.instance(instList[i]);
  for(int j=0; j<temp.numValues(); j++) {
   attrVals[j] += temp.valueSparse(j);
  }
 }
 for(int j=0, numInsts=instList.length; j<attrVals.length; j++) {
  attrVals[j] /= numInsts;
 }
 temp = new DenseInstance(1.0, attrVals);
 return temp;
}

/**
 * log(N!) + (sum for all the words i)(log(Pi^ni) - log(ni!))
 *  
 *  where 
 *      N is the total number of words
 *      Pi is the probability of obtaining word i
 *      ni is the number of times the word at index i occurs in the document
 *
 * Actually, this method just computes (sum for all the words i)(log(Pi^ni) because the factorials are irrelevant
 * when posterior class probabilities are computed.
 *
 * @param inst       The instance to be classified
 * @param classIndex The index of the class we are calculating the probability with respect to
 *
 * @return The log of the probability of the document occuring given the class
 */
 
protected double probOfDocGivenClass(Instance inst, int classIndex) {
 double answer = 0;
 for(int i = 0; i < inst.numValues(); i++) {
  if (inst.index(i) != inst.classIndex()) {
   answer += (inst.valueSparse(i) * m_probOfWordGivenClass[classIndex][inst.index(i)]);
  }
 }
 return answer;
}

/**
 * 
 * @param inst
 * @return
 * @throws Exception
 */
public double SVMOutput(Instance inst) throws Exception {
 double result = -m_b;
 // Is the machine linear?
 if (m_weights != null) {
  // Is weight vector stored in sparse format?
  for (int i = 0; i < inst.numValues(); i++) {
   if (inst.index(i) != m_classIndex) {
    result += m_weights[inst.index(i)] * inst.valueSparse(i);
   }
  }
 } else {
  for (int i = m_supportVectors.getNext(-1); i != -1; i = m_supportVectors
   .getNext(i)) {
   result += (m_alpha[i] - m_alphaStar[i]) * m_kernel.eval(-1, i, inst);
  }
 }
 return result;
}

/**
 * 
 * @param inst
 * @return
 * @throws Exception
 */
public double SVMOutput(Instance inst) throws Exception {
 double result = -m_b;
 // Is the machine linear?
 if (m_weights != null) {
  // Is weight vector stored in sparse format?
  for (int i = 0; i < inst.numValues(); i++) {
   if (inst.index(i) != m_classIndex) {
    result += m_weights[inst.index(i)] * inst.valueSparse(i);
   }
  }
 } else {
  for (int i = m_supportVectors.getNext(-1); i != -1; i = m_supportVectors
   .getNext(i)) {
   result += (m_alpha[i] - m_alphaStar[i]) * m_kernel.eval(-1, i, inst);
  }
 }
 return result;
}

/**
 * Updates the classifier with information from one training instance.
 *
 * @param instance the instance to be incorporated
 * @throws Exception if the instance cannot be processed successfully.
 */
public void updateClassifier(Instance instance) throws Exception {
 double classValue = instance.value(instance.classIndex());
 if (!Utils.isMissingValue(classValue)) {
  int classIndex = (int) classValue;
  m_probOfClass[classIndex] += instance.weight();
  for (int a = 0; a < instance.numValues(); a++) {
   if (instance.index(a) != instance.classIndex()) {
    if (!instance.isMissingSparse(a)) {
     double numOccurrences = instance.valueSparse(a) * instance.weight();
     if (numOccurrences < 0)
      throw new Exception("Numeric attribute values must all be greater or equal to zero.");
     m_wordsPerClass[classIndex] += numOccurrences;
     m_probOfWordGivenClass[classIndex][instance.index(a)] += numOccurrences;
    }
   }
  }
 }
}

/**
 * Updates the classifier with information from one training instance.
 *
 * @param instance the instance to be incorporated
 * @throws Exception if the instance cannot be processed successfully.
 */
public void updateClassifier(Instance instance) throws Exception {
 double classValue = instance.value(instance.classIndex());
 if (!Utils.isMissingValue(classValue)) {
  int classIndex = (int) classValue;
  m_probOfClass[classIndex] += instance.weight();
  for (int a = 0; a < instance.numValues(); a++) {
   if (instance.index(a) != instance.classIndex()) {
    if (!instance.isMissingSparse(a)) {
     double numOccurrences = instance.valueSparse(a) * instance.weight();
     if (numOccurrences < 0)
      throw new Exception("Numeric attribute values must all be greater or equal to zero.");
     m_wordsPerClass[classIndex] += numOccurrences;
     m_probOfWordGivenClass[classIndex][instance.index(a)] += numOccurrences;
    }
   }
  }
 }
}

/**
 * Merges this instance with the given instance and returns the result.
 * Dataset is set to null.
 * 
 * @param inst the instance to be merged with this one
 * @return the merged instances
 */
@Override
public Instance mergeInstance(Instance inst) {
 double[] values = new double[numValues() + inst.numValues()];
 int[] indices = new int[numValues() + inst.numValues()];
 int m = 0;
 for (int j = 0; j < numValues(); j++, m++) {
  values[m] = valueSparse(j);
  indices[m] = index(j);
 }
 for (int j = 0; j < inst.numValues(); j++, m++) {
  values[m] = inst.valueSparse(j);
  indices[m] = numAttributes() + inst.index(j);
 }
 return new SparseInstance(1.0, values, indices, numAttributes()
  + inst.numAttributes());
}

/**
 * Merges this instance with the given instance and returns the result.
 * Dataset is set to null.
 * 
 * @param inst the instance to be merged with this one
 * @return the merged instances
 */
@Override
public Instance mergeInstance(Instance inst) {
 double[] values = new double[numValues() + inst.numValues()];
 int[] indices = new int[numValues() + inst.numValues()];
 int m = 0;
 for (int j = 0; j < numValues(); j++, m++) {
  values[m] = valueSparse(j);
  indices[m] = index(j);
 }
 for (int j = 0; j < inst.numValues(); j++, m++) {
  values[m] = inst.valueSparse(j);
  indices[m] = numAttributes() + inst.index(j);
 }
 return new SparseInstance(1.0, values, indices, numAttributes()
  + inst.numAttributes());
}

Javadoc

Returns an instance's attribute value in internal format, given an index in the sparse representation. Same as value(int) for a DenseInstance.

Popular methods of Instance

setValue
Sets a value of an nominal or string attribute to the given value. Performs a deep copy of the vecto
value
Returns an instance's attribute value in internal format. The given attribute has to belong to a dat
setDataset
Sets the reference to the dataset. Does not check if the instance is compatible with the dataset. No
classValue
Returns an instance's class value as a floating-point number.
numAttributes
Returns the number of attributes.
isMissing
Tests if a specific value is "missing". The given attribute has to belong to a dataset.
weight
Returns the instance's weight.
setClassValue
Sets the class value of an instance to the given value. A deep copy of the vector of attribute value
index
Returns the index of the attribute stored at the given position in the sparse representation. Identi
numValues
Returns the number of values present in a sparse representation.
stringValue
Returns the value of a nominal, string, date, or relational attribute for the instance as a string.
dataset
Returns the dataset this instance has access to. (ie. obtains information about attribute types from

Popular in Java

Making http post requests using okhttp
getSharedPreferences (Context)
setScale (BigDecimal)
getApplicationContext (Context)
Selector (java.nio.channels)
A controller for the selection of SelectableChannel objects. Selectable channels can be registered w
Date (java.sql)
A class which can consume and produce dates in SQL Date format. Dates are represented in SQL as yyyy
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
Timer (java.util)
Timers schedule one-shot or recurring TimerTask for execution. Prefer java.util.concurrent.Scheduled
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
Best IntelliJ plugins

How to use valueSparsemethodin weka.core.Instance

Best Java code snippets using weka.core.Instance.valueSparse (Showing top 20 results out of 315)

How to use
valueSparse
method
in
weka.core.Instance