/*** * BCubed precision is the proportion of instances in a cluster that share the same label. * * @param label is the class label we are evaluating * @param cluster that we are evaluating * @return the proportion of instances in cluster that have the specified label */ public double precision(String label, Cluster cluster) { double count = 0, precision = 0; for (Instance i : cluster.getMembers()) { if (i.hasClassLabel(label)) count++; } if (count > 0) precision = count / cluster.getMembers().size(); return precision; }
private double error(ClusterResult clusters) { double error = 0; for (Cluster c : clusters) { for (Instance i : c.getMembers()) { error += this.distance(c, i); } } return error; } private boolean isConverged(double oldError, double newError) {
private double getMI(int numInstances, Collection<Instance> event, Collection<Cluster> clusters) { double mi = 0; for (Cluster c : clusters) { // calc the intersection of the event with the cluster Set<Instance> intersect = new HashSet<Instance>(c.getMembers()); intersect.retainAll(event); if (intersect.isEmpty()) continue; // calc mutual information of event with cluster mi += ((double)intersect.size() / numInstances) * Math.log(numInstances * (double)intersect.size() / (event.size() * c.size())); } return mi; }
public int instanceCount(Collection<? extends Instance> group) { int count = 0; for (Instance i : group) { if (i instanceof Cluster) { count += instanceCount(((Cluster)i).getMembers()); } else { count++; } } return count; }
private boolean contains(Instance inst, Cluster c) { for (Instance i : c.getMembers()) { if (i instanceof Cluster) { if ( contains(inst, (Cluster)i) ) return true; } else if (i == inst) return true; } return false; }
public int instanceCount(Collection<? extends Instance> group) { int count = 0; for (Instance i : group) { if (i instanceof Cluster) { count += instanceCount(((Cluster)i).getMembers()); } else { count++; } } return count; }
public String toString(boolean printMembers) { StringBuilder str = new StringBuilder(); str.append(super.toString()); if (printMembers) { str.append("\nMembers:\n"); for (Instance inst : this.getMembers()) { str.append(inst.toString() + "\n"); } } return str.toString(); }
private Map<String, Collection<Instance>> getEvents(Collection<Cluster> clusters) { Map<String, Collection<Instance>> events = new HashMap<String, Collection<Instance>>(); for (Cluster c : clusters) { for (Instance inst : c.getMembers()) { if (events.containsKey(inst.getClassLabel()) == false) { events.put(inst.getClassLabel(), new ArrayList<Instance>()); } events.get(inst.getClassLabel()).add(inst); } } return events; }
/*** * BCubed recall is the proportion of instances with class label are in this cluster. * @param label is the class label we are evaluating * @param cluster that we are evaluating * @param clusters is a collection of all the clusters * @return the proportion of instances with class label that are members of cluster */ public double recall(String label, Cluster cluster, Collection<Cluster> clusters) { double count = 0, total = 0, recall = 0; // first determine how many instances have class label in this cluster for (Instance i : cluster.getMembers()) { if (i.hasClassLabel(label)) count++; } // find out how many instances in total have this class label for (Cluster c : clusters) { for (Instance i : c.getMembers()) { if (i.hasClassLabel(label)) total++; } } if (total > 0) recall = count / total; return recall; }
public double avePrecision(Instance inst, Collection<Cluster> clusters) { double p = 0; Collection<Cluster> iclusters = getClusters(inst, clusters); for (Cluster c : iclusters) { p += (double)labelCount(inst.getClassLabel(), c.getMembers()) / instanceCount(c.getMembers()); } return (p / iclusters.size()); }
public int labelCount(String label, Collection<? extends Instance> group) { int count = 0; for (Instance i : group) { if (i instanceof Cluster) { count += labelCount(label, ((Cluster)i).getMembers()); } else if (i.hasClassLabel(label)) { count++; } } return count; }
public int labelCount(String label, Collection<? extends Instance> group) { int count = 0; for (Instance i : group) { if (i instanceof Cluster) { count += labelCount(label, ((Cluster)i).getMembers()); } else if (i.hasClassLabel(label)) { count++; } } return count; }
@Override public Map<String, Instance> call(Map<String, Instance> clusters1, Map<String, Instance> cluster2) throws Exception { BestClusterFunction bestClusterFunc = new BestClusterFunction( distFunc, clusters1, threshold ); for (String key : cluster2.keySet()) { Instance instance = cluster2.get(key); Tuple2<String, Instance> result = bestClusterFunc.call(new Tuple2<String, Instance>(instance.getId(), instance)); if (result == null) { clusters1.put(instance.getId(), instance); } else { Cluster cluster = (Cluster)clusters1.get(result._1); cluster.add(instance); // revise the cluster centroid cluster.getMembers().clear(); // no need to retain the member list } } return clusters1; } }
public Score bestFmeasure(String label, double labelCount, Collection<? extends Instance> group) { Score score = new Score(), best = new Score(); double lcount = 0, count = 0; for (Instance i : group) { if (i instanceof Cluster) { // calculate stats for this cluster lcount += labelCount(label, ((Cluster) i).getMembers()); count += instanceCount(((Cluster) i).getMembers()); // calculate child cluster's best f measure score = bestFmeasure(label, labelCount, ((Cluster)i).getMembers()); // update the best f measure found so far if (score.f > best.f) best = score; } } // calculate f measure for this cluster score.precision = (lcount / count); // precision score.recall = (lcount / labelCount); // recall score.f = 2 * score.precision * score.recall / (score.precision + score.recall); // f measure // update the best f measure found if (score.f > best.f) { best = score; } // return best f measure for this class label return best; }
public double validate(Collection<Cluster> clusters) { int numInstances = 0; double norm = 0, factor1 = 0, factor2 = 0, mi = 0; // calculate the total number of instances Map<String, Collection<Instance>> events = getEvents(clusters); for (Collection<Instance> e : events.values()) { numInstances += e.size(); } // calculate normalization factor for (Cluster c : clusters) { factor1 += getNormFactor(numInstances, c.getMembers()); } for (Collection<Instance> e : events.values()) { factor2 += getNormFactor(numInstances, e); } norm = 0.5 * (-1 * factor1 - factor2); // calculate the mutual information for all events and clusters for (String label : events.keySet()) { mi += getMI(numInstances, events.get(label), clusters); } // return normalized nmi return (mi / norm); } }
for (Instance instance : cluster.getMembers()) { for (Feature f : instance.getAllFeatures()) { if (f instanceof TrackFeature) {
public static double cohesion(Clusterer clusterer, Cluster cluster) { double norm = cluster.size(); double cohesion = 0; for (Instance inst : cluster.getMembers()) { if (inst instanceof Cluster) { cohesion += cohesion(clusterer, (Cluster)inst); } else { cohesion += clusterer.distance(cluster, inst); } } return cohesion / norm; }
public double aveRecall(Instance inst, Collection<Cluster> clusters) { double r = 0, labelCount = labelCount(inst.getClassLabel(), clusters); Collection<Cluster> iclusters = getClusters(inst, clusters); for (Cluster c : iclusters) { r += (double)labelCount(inst.getClassLabel(), c.getMembers()) / labelCount; } // count will never be zero since we include inst itself return (r / iclusters.size()); }
public double validate(Collection<Cluster> clusters) { int num = 0; double p = 0, r = 0; for (Cluster c : clusters) { for (Instance inst : c.getMembers()) { p += precision(inst.getClassLabel(), c) ; r += recall(inst.getClassLabel(), c, clusters); num++; } } // calculate average precision and recall precision = p / num; recall = r / num; // calculate and return the f score f = 2 * precision * recall / (precision + recall); return f; }