/** * Returns c*log2(c) for a given integer value c. * * @param c an integer value * @return c*log2(c) (but is careful to return 0 if c is 0) */ public static/* @pure@ */double xlogx(int c) { if (c == 0) { return 0.0; } return c * Utils.log2(c); }
/** * Returns coding cost for split (used in rule learner). */ @Override public final double codingCost() { return Utils.log2(m_index); }
/** * Returns c*log2(c) for a given integer value c. * * @param c an integer value * @return c*log2(c) (but is careful to return 0 if c is 0) */ public static/* @pure@ */double xlogx(int c) { if (c == 0) { return 0.0; } return c * Utils.log2(c); }
/** * Returns coding cost for split (used in rule learner). */ @Override public final double codingCost() { return Utils.log2(m_index); }
@Override public double getMetricRange(Map<String, WeightMass> preDist) { int numClasses = preDist.size(); if (numClasses < 2) { numClasses = 2; } return Utils.log2(numClasses); }
@Override public double getMetricRange(Map<String, WeightMass> preDist) { int numClasses = preDist.size(); if (numClasses < 2) { numClasses = 2; } return Utils.log2(numClasses); }
/** * Subset description length: <br> * S(t,k,p) = -k*log2(p)-(n-k)log2(1-p) * * Details see Quilan: "MDL and categorical theories (Continued)",ML95 * * @param t the number of elements in a known set * @param k the number of elements in a subset * @param p the expected proportion of subset known by recipient * @return the subset description length */ public static double subsetDL(double t, double k, double p) { double rt = Utils.gr(p, 0.0) ? (-k * Utils.log2(p)) : 0.0; rt -= (t - k) * Utils.log2(1 - p); return rt; }
/** * Subset description length: <br> * S(t,k,p) = -k*log2(p)-(n-k)log2(1-p) * * Details see Quilan: "MDL and categorical theories (Continued)",ML95 * * @param t the number of elements in a known set * @param k the number of elements in a subset * @param p the expected proportion of subset known by recipient * @return the subset description length */ public static double subsetDL(double t, double k, double p) { double rt = Utils.gr(p, 0.0) ? (-k * Utils.log2(p)) : 0.0; rt -= (t - k) * Utils.log2(1 - p); return rt; }
/** * Function used to calculate the entropy of given vector of values entropy * = (1/sum)*{-sigma[i=1..P](Xi*log2(Xi)) + sum*log2(sum)} where P is the * length of the vector * * @param value the given vector of values * @param sum the sum of the given values. It's provided just for * efficiency. * @return the entropy */ protected double entropy(double[] value, double sum) { if (Utils.smOrEq(sum, 1.0E-6)) { return 0; } double entropy = 0; for (int i = 0; i < value.length; i++) { if (!Utils.eq(value[i], 0)) { entropy -= value[i] * Utils.log2(value[i]); } } entropy += sum * Utils.log2(sum); entropy /= sum; return entropy; }
/** * calculates the probability using a binomial distribution. If the support of * the premise is too large this distribution is approximated by a normal * distribution. * * @param accuracy the accuracy value * @param ruleCount the support of the whole rule * @param premiseCount the support of the premise * @return the probability value */ public static final double binomialDistribution(double accuracy, double ruleCount, double premiseCount) { double mu, sigma; if (premiseCount < MAX_N) { return Math .pow( 2, (Utils.log2(Math.pow(accuracy, ruleCount)) + Utils.log2(Math.pow((1.0 - accuracy), (premiseCount - ruleCount))) + PriorEstimation .logbinomialCoefficient((int) premiseCount, (int) ruleCount))); } else { mu = premiseCount * accuracy; sigma = Math.sqrt((premiseCount * (1.0 - accuracy)) * accuracy); return Statistics.normalProbability(((ruleCount + 0.5) - mu) / (sigma * Math.sqrt(2))); } }
/** * The description length of the theory for a given rule. Computed as:<br> * 0.5* [||k||+ S(t, k, k/t)]<br> * where k is the number of antecedents of the rule; t is the total possible * antecedents that could appear in a rule; ||K|| is the universal prior for k * , log2*(k) and S(t,k,p) = -k*log2(p)-(n-k)log2(1-p) is the subset encoding * length. * <p> * * Details see Quilan: "MDL and categorical theories (Continued)",ML95 * * @param index the index of the given rule (assuming correct) * @return the theory DL, weighted if weight != 1.0 */ public double theoryDL(int index) { double k = m_Ruleset.get(index).size(); if (k == 0) { return 0.0; } double tdl = Utils.log2(k); if (k > 1) { tdl += 2.0 * Utils.log2(tdl); // of log2 star } tdl += subsetDL(m_Total, k, k / m_Total); // System.out.println("!!!theory: "+MDL_THEORY_WEIGHT * REDUNDANCY_FACTOR * // tdl); return MDL_THEORY_WEIGHT * REDUNDANCY_FACTOR * tdl; }
/** * The description length of the theory for a given rule. Computed as:<br> * 0.5* [||k||+ S(t, k, k/t)]<br> * where k is the number of antecedents of the rule; t is the total possible * antecedents that could appear in a rule; ||K|| is the universal prior for k * , log2*(k) and S(t,k,p) = -k*log2(p)-(n-k)log2(1-p) is the subset encoding * length. * <p> * * Details see Quilan: "MDL and categorical theories (Continued)",ML95 * * @param index the index of the given rule (assuming correct) * @return the theory DL, weighted if weight != 1.0 */ public double theoryDL(int index) { double k = m_Ruleset.get(index).size(); if (k == 0) { return 0.0; } double tdl = Utils.log2(k); if (k > 1) { tdl += 2.0 * Utils.log2(tdl); // of log2 star } tdl += subsetDL(m_Total, k, k / m_Total); // System.out.println("!!!theory: "+MDL_THEORY_WEIGHT * REDUNDANCY_FACTOR * // tdl); return MDL_THEORY_WEIGHT * REDUNDANCY_FACTOR * tdl; }
double addend = Utils.log2(distr) - max + Utils.log2((Math.pow(2, i) - 1)) + logbinomialCoefficient(m_instances.numAttributes(), i); sum = sum + Math.pow(2, addend); double addend = Utils.log2((Math.pow(2, i) - 1)) - max + logbinomialCoefficient(m_instances.numAttributes(), i); sum = sum + Math.pow(2, addend);
/** * Computes the entropy of a dataset. * * @param data the data for which entropy is to be computed * @return the entropy of the data's class distribution * @throws Exception if computation fails */ private double computeEntropy(Instances data) throws Exception { double [] classCounts = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); classCounts[(int) inst.classValue()]++; } double entropy = 0; for (int j = 0; j < data.numClasses(); j++) { if (classCounts[j] > 0) { entropy -= classCounts[j] * Utils.log2(classCounts[j]); } } entropy /= (double) data.numInstances(); return entropy + Utils.log2(data.numInstances()); }
after = Utils.log2(numCutPoints) + distAfter + instAfter;
after = Utils.log2(numCutPoints) + distAfter + instAfter;
double totalBits = Utils.log2(cover + uncover + 1.0); // how many data? double coverBits, uncoverBits; // What's the error? double expErr; // Expected FP or FN
double totalBits = Utils.log2(cover + uncover + 1.0); // how many data? double coverBits, uncoverBits; // What's the error? double expErr; // Expected FP or FN