/** * Used to initialize the ranges. For this the values of the first instance is * used to save time. Sets low and high to the values of the first instance * and width to zero. * * @param instance the new instance * @param numAtt number of attributes in the model (ignored) * @param ranges low, high and width values for all attributes */ public void updateRangesFirst(Instance instance, int numAtt, double[][] ranges) { for (int i = 0; i < ranges.length; i++) { for (int j = 0; j < ranges[i].length; j++) { ranges[i][j] = 0.0; } } int numVals = instance.numValues(); for (int j = 0; j < numVals; j++) { int currIndex = instance.index(j); if (!instance.isMissingSparse(j)) { ranges[currIndex][R_MIN] = instance.valueSparse(j); ranges[currIndex][R_MAX] = instance.valueSparse(j); } else { // if value was missing ranges[currIndex][R_MIN] = Double.POSITIVE_INFINITY; ranges[currIndex][R_MAX] = -Double.POSITIVE_INFINITY; ranges[currIndex][R_WIDTH] = Double.POSITIVE_INFINITY; } } }
/** * log(N!) + (sum for all the words i)(log(Pi^ni) - log(ni!)) * * where * N is the total number of words * Pi is the probability of obtaining word i * ni is the number of times the word at index i occurs in the document * * Actually, this method just computes (sum for all the words i)(log(Pi^ni) because the factorials are irrelevant * when posterior class probabilities are computed. * * @param inst The instance to be classified * @param classIndex The index of the class we are calculating the probability with respect to * * @return The log of the probability of the document occuring given the class */ protected double probOfDocGivenClass(Instance inst, int classIndex) { double answer = 0; for(int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != inst.classIndex()) { answer += inst.valueSparse(i) * (Math.log(m_probOfWordGivenClass[classIndex][inst.index(i)]) - Math.log(m_wordsPerClass[classIndex])); } } return answer; }
protected static double dotProd(Instance inst1, double[] weights, int classIndex) { double result = 0; int n1 = inst1.numValues(); int n2 = weights.length - 1; for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) { int ind1 = inst1.index(p1); int ind2 = p2; if (ind1 == ind2) { if (ind1 != classIndex && !inst1.isMissingSparse(p1)) { result += inst1.valueSparse(p1) * weights[p2]; } p1++; p2++; } else if (ind1 > ind2) { p2++; } else { p1++; } } return (result); }
protected static double dotProd(Instance inst1, double[] weights, int classIndex) { double result = 0; int n1 = inst1.numValues(); int n2 = weights.length - 1; for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) { int ind1 = inst1.index(p1); int ind2 = p2; if (ind1 == ind2) { if (ind1 != classIndex && !inst1.isMissingSparse(p1)) { result += inst1.valueSparse(p1) * weights[p2]; } p1++; p2++; } else if (ind1 > ind2) { p2++; } else { p1++; } } return (result); }
protected static double dotProd(Instance inst1, double[] weights, int classIndex) { double result = 0; int n1 = inst1.numValues(); int n2 = weights.length - 1; for (int p1 = 0, p2 = 0; p1 < n1 && p2 < n2;) { int ind1 = inst1.index(p1); int ind2 = p2; if (ind1 == ind2) { if (ind1 != classIndex && !inst1.isMissingSparse(p1)) { result += inst1.valueSparse(p1) * weights[p2]; } p1++; p2++; } else if (ind1 > ind2) { p2++; } else { p1++; } } return (result); }
/** * Builds the kernel. Calls the super class method and then also initializes the cache for * the diagonal of the dot product matrix. */ public void buildKernel(Instances data) throws Exception { super.buildKernel(data); m_kernelPrecalc = new double[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { double sum = 0; Instance inst = data.instance(i); for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) != data.classIndex()) { sum += inst.valueSparse(j) * inst.valueSparse(j); } } m_kernelPrecalc[i] = sum; } }
/** * Constructs dense boolean matrix from weka instances, * that is expected to be filled with values 0, ?, 1 */ public BooleanMatrix(Instances instances) { this(instances.numInstances(), instances.numAttributes()); int row = 0; for (Instance instance: instances) { for (int i = 0; i < instance.numValues(); i++) { int col = instance.index(i); double value = instance.valueSparse(i); byte b = Double.isNaN(value) ? UNKNOWN : value == 0d ? FALSE : TRUE; this.update(row, col, b); } row++; } }
/** * Builds the kernel. Calls the super class method and then also initializes the cache for * the diagonal of the dot product matrix. */ public void buildKernel(Instances data) throws Exception { super.buildKernel(data); m_kernelPrecalc = new double[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { double sum = 0; Instance inst = data.instance(i); for (int j = 0; j < inst.numValues(); j++) { if (inst.index(j) != data.classIndex()) { sum += inst.valueSparse(j) * inst.valueSparse(j); } } m_kernelPrecalc[i] = sum; } }
/** * converts a single instance to the required format * * @param instance the instance to convert * @return the converted instance */ protected Instance convertInstance(Instance instance) throws Exception { double vals[] = new double[outputFormatPeek().numAttributes()]; for (int j = 0; j < m_k; j++) { for (int i = 0; i < instance.numValues(); i++) { int index = instance.index(i); if (index != instance.classIndex()) { double value = instance.valueSparse(i); if (!Utils.isMissingValue(value)) { vals[j] += m_rmatrix[j][index] * value; } } else { vals[m_k] = instance.valueSparse(i); } } } return new DenseInstance(instance.weight(), vals); }
/** * Calculates the centroid pivot of a node. The node is given * in the form of an indices array that contains the * indices of the points inside the node. * @param instList The indices array pointing to the * instances in the node. * @param insts The actual instances. The instList * points to instances in this object. * @return The calculated centre/pivot of the node. */ public static Instance calcCentroidPivot(int[] instList, Instances insts) { double[] attrVals = new double[insts.numAttributes()]; Instance temp; for(int i=0; i<instList.length; i++) { temp = insts.instance(instList[i]); for(int j=0; j<temp.numValues(); j++) { attrVals[j] += temp.valueSparse(j); } } for(int j=0, numInsts=instList.length; j<attrVals.length; j++) { attrVals[j] /= numInsts; } temp = new DenseInstance(1.0, attrVals); return temp; }
/** * converts a single instance to the required format * * @param instance the instance to convert * @return the converted instance */ protected Instance convertInstance(Instance instance) throws Exception { double vals[] = new double[outputFormatPeek().numAttributes()]; for (int j = 0; j < m_k; j++) { for (int i = 0; i < instance.numValues(); i++) { int index = instance.index(i); if (index != instance.classIndex()) { double value = instance.valueSparse(i); if (!Utils.isMissingValue(value)) { vals[j] += m_rmatrix[j][index] * value; } } else { vals[m_k] = instance.valueSparse(i); } } } return new DenseInstance(instance.weight(), vals); }
/** * log(N!) + (sum for all the words i)(log(Pi^ni) - log(ni!)) * * where * N is the total number of words * Pi is the probability of obtaining word i * ni is the number of times the word at index i occurs in the document * * Actually, this method just computes (sum for all the words i)(log(Pi^ni) because the factorials are irrelevant * when posterior class probabilities are computed. * * @param inst The instance to be classified * @param classIndex The index of the class we are calculating the probability with respect to * * @return The log of the probability of the document occuring given the class */ protected double probOfDocGivenClass(Instance inst, int classIndex) { double answer = 0; for(int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != inst.classIndex()) { answer += (inst.valueSparse(i) * m_probOfWordGivenClass[classIndex][inst.index(i)]); } } return answer; }
/** * Calculates the centroid pivot of a node. The node is given * in the form of an indices array that contains the * indices of the points inside the node. * @param instList The indices array pointing to the * instances in the node. * @param insts The actual instances. The instList * points to instances in this object. * @return The calculated centre/pivot of the node. */ public static Instance calcCentroidPivot(int[] instList, Instances insts) { double[] attrVals = new double[insts.numAttributes()]; Instance temp; for(int i=0; i<instList.length; i++) { temp = insts.instance(instList[i]); for(int j=0; j<temp.numValues(); j++) { attrVals[j] += temp.valueSparse(j); } } for(int j=0, numInsts=instList.length; j<attrVals.length; j++) { attrVals[j] /= numInsts; } temp = new DenseInstance(1.0, attrVals); return temp; }
/** * log(N!) + (sum for all the words i)(log(Pi^ni) - log(ni!)) * * where * N is the total number of words * Pi is the probability of obtaining word i * ni is the number of times the word at index i occurs in the document * * Actually, this method just computes (sum for all the words i)(log(Pi^ni) because the factorials are irrelevant * when posterior class probabilities are computed. * * @param inst The instance to be classified * @param classIndex The index of the class we are calculating the probability with respect to * * @return The log of the probability of the document occuring given the class */ protected double probOfDocGivenClass(Instance inst, int classIndex) { double answer = 0; for(int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != inst.classIndex()) { answer += (inst.valueSparse(i) * m_probOfWordGivenClass[classIndex][inst.index(i)]); } } return answer; }
/** * * @param inst * @return * @throws Exception */ public double SVMOutput(Instance inst) throws Exception { double result = -m_b; // Is the machine linear? if (m_weights != null) { // Is weight vector stored in sparse format? for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != m_classIndex) { result += m_weights[inst.index(i)] * inst.valueSparse(i); } } } else { for (int i = m_supportVectors.getNext(-1); i != -1; i = m_supportVectors .getNext(i)) { result += (m_alpha[i] - m_alphaStar[i]) * m_kernel.eval(-1, i, inst); } } return result; }
/** * * @param inst * @return * @throws Exception */ public double SVMOutput(Instance inst) throws Exception { double result = -m_b; // Is the machine linear? if (m_weights != null) { // Is weight vector stored in sparse format? for (int i = 0; i < inst.numValues(); i++) { if (inst.index(i) != m_classIndex) { result += m_weights[inst.index(i)] * inst.valueSparse(i); } } } else { for (int i = m_supportVectors.getNext(-1); i != -1; i = m_supportVectors .getNext(i)) { result += (m_alpha[i] - m_alphaStar[i]) * m_kernel.eval(-1, i, inst); } } return result; }
/** * Updates the classifier with information from one training instance. * * @param instance the instance to be incorporated * @throws Exception if the instance cannot be processed successfully. */ public void updateClassifier(Instance instance) throws Exception { double classValue = instance.value(instance.classIndex()); if (!Utils.isMissingValue(classValue)) { int classIndex = (int) classValue; m_probOfClass[classIndex] += instance.weight(); for (int a = 0; a < instance.numValues(); a++) { if (instance.index(a) != instance.classIndex()) { if (!instance.isMissingSparse(a)) { double numOccurrences = instance.valueSparse(a) * instance.weight(); if (numOccurrences < 0) throw new Exception("Numeric attribute values must all be greater or equal to zero."); m_wordsPerClass[classIndex] += numOccurrences; m_probOfWordGivenClass[classIndex][instance.index(a)] += numOccurrences; } } } } }
/** * Updates the classifier with information from one training instance. * * @param instance the instance to be incorporated * @throws Exception if the instance cannot be processed successfully. */ public void updateClassifier(Instance instance) throws Exception { double classValue = instance.value(instance.classIndex()); if (!Utils.isMissingValue(classValue)) { int classIndex = (int) classValue; m_probOfClass[classIndex] += instance.weight(); for (int a = 0; a < instance.numValues(); a++) { if (instance.index(a) != instance.classIndex()) { if (!instance.isMissingSparse(a)) { double numOccurrences = instance.valueSparse(a) * instance.weight(); if (numOccurrences < 0) throw new Exception("Numeric attribute values must all be greater or equal to zero."); m_wordsPerClass[classIndex] += numOccurrences; m_probOfWordGivenClass[classIndex][instance.index(a)] += numOccurrences; } } } } }
/** * Merges this instance with the given instance and returns the result. * Dataset is set to null. * * @param inst the instance to be merged with this one * @return the merged instances */ @Override public Instance mergeInstance(Instance inst) { double[] values = new double[numValues() + inst.numValues()]; int[] indices = new int[numValues() + inst.numValues()]; int m = 0; for (int j = 0; j < numValues(); j++, m++) { values[m] = valueSparse(j); indices[m] = index(j); } for (int j = 0; j < inst.numValues(); j++, m++) { values[m] = inst.valueSparse(j); indices[m] = numAttributes() + inst.index(j); } return new SparseInstance(1.0, values, indices, numAttributes() + inst.numAttributes()); }
/** * Merges this instance with the given instance and returns the result. * Dataset is set to null. * * @param inst the instance to be merged with this one * @return the merged instances */ @Override public Instance mergeInstance(Instance inst) { double[] values = new double[numValues() + inst.numValues()]; int[] indices = new int[numValues() + inst.numValues()]; int m = 0; for (int j = 0; j < numValues(); j++, m++) { values[m] = valueSparse(j); indices[m] = index(j); } for (int j = 0; j < inst.numValues(); j++, m++) { values[m] = inst.valueSparse(j); indices[m] = numAttributes() + inst.index(j); } return new SparseInstance(1.0, values, indices, numAttributes() + inst.numAttributes()); }