@SuppressWarnings("unchecked") private void updateCluster(Instance inst, Cluster cluster) { if (inst instanceof Cluster) { // merge the clusters Cluster c = (Cluster)inst; for (String key : c.getCentroids().keySet()) { Centroid<Feature> update = c.getCentroids().get(key); Centroid<Feature> centroid = cluster.getCentroids().get(key); // get all the aggregate feature values associated with update for (Feature f : update.getAggregatableCentroid()) { centroid.add(f); } } // after merging cluster we should manually update the resulting centroid cluster.updateCentroid(); // TODO should handle merging the cluster members } else { // simply add the instance to the cluster cluster.add(inst); } }
/*** * BCubed precision is the proportion of instances in a cluster that share the same label. * * @param label is the class label we are evaluating * @param cluster that we are evaluating * @return the proportion of instances in cluster that have the specified label */ public double precision(String label, Cluster cluster) { double count = 0, precision = 0; for (Instance i : cluster.getMembers()) { if (i.hasClassLabel(label)) count++; } if (count > 0) precision = count / cluster.getMembers().size(); return precision; }
public Cluster create() { return (new Cluster(UUID.randomUUID().toString(), featureTypeDefs.values(), onlineUpdate)); } }
private double getMI(int numInstances, Collection<Instance> event, Collection<Cluster> clusters) { double mi = 0; for (Cluster c : clusters) { // calc the intersection of the event with the cluster Set<Instance> intersect = new HashSet<Instance>(c.getMembers()); intersect.retainAll(event); if (intersect.isEmpty()) continue; // calc mutual information of event with cluster mi += ((double)intersect.size() / numInstances) * Math.log(numInstances * (double)intersect.size() / (event.size() * c.size())); } return mi; }
bestCluster.add(inst); if (!onlineUpdate) bestCluster.updateCentroid(); clusters.add(bestCluster); bestCluster.add(inst); c.updateCentroid();
@Override public Map<String, Instance> call(Map<String, Instance> clusters1, Map<String, Instance> cluster2) throws Exception { BestClusterFunction bestClusterFunc = new BestClusterFunction( distFunc, clusters1, threshold ); for (String key : cluster2.keySet()) { Instance instance = cluster2.get(key); Tuple2<String, Instance> result = bestClusterFunc.call(new Tuple2<String, Instance>(instance.getId(), instance)); if (result == null) { clusters1.put(instance.getId(), instance); } else { Cluster cluster = (Cluster)clusters1.get(result._1); cluster.add(instance); // revise the cluster centroid cluster.getMembers().clear(); // no need to retain the member list } } return clusters1; } }
@Override public Map<String, Instance> call(Map<String, Instance> clusterList1, Map<String, Instance> clusterList2) throws Exception { if (clusterList1.isEmpty()) { clusterList1.putAll(clusterList2); return clusterList1; } for (String id : clusterList2.keySet()) { Instance c = clusterList2.get(id); BestClusterFunction bestClusterFunc = new BestClusterFunction( distFunc, clusterList1, threshold ); Tuple2<String, Instance> result = bestClusterFunc.call(new Tuple2<String, Instance>(c.getId(), c)); if (result._1 == null) { clusterList1.put(c.getId(), c); } else { ((Cluster)clusterList1.get(result._1)).add(c); } } return clusterList1; } }
mean.reset(); int c=0; for (Cluster cluster: kmeans) log.info("\t "+c+": "+cluster.getIterationDebugInfo());
public void updateCentroid() { for (String name : centroids.keySet()) { addFeature(centroids.get(name).getCentroid()); } }
private List<Cluster> initKMeans(DataSet ds) { List<Cluster> kmeans = new LinkedList<Cluster>(); int ki = (ds.size() < k) ? ds.size(): k; // randomly pick k instances as the initial k means ArrayList<String> indexes = new ArrayList<String>(ds.size()); ArrayList<String> keys = new ArrayList<String>(ds.getKeys()); for (int i = 0; i < keys.size(); i++) { indexes.add( keys.get(i) ); } Collections.shuffle(indexes); for (int i = 0; i < ki; i++) { Cluster c = this.createCluster(); c.add( ds.get(indexes.get(i)) ); c.updateCentroid(); kmeans.add(c); } return kmeans; }
private Map<String, Instance> initKMeans(SparkDataSet ds) { Map<String, Instance> kmeans = new HashMap<String, Instance>(k); List<Tuple2<String, Instance>> kpoints = ds.getRDD().takeSample(false, k, (new Random()).nextInt()); for (Tuple2<String, Instance> point : kpoints) { Cluster c = this.createCluster(); c.add(point._2); kmeans.put(c.getId(), c); } return kmeans; }
public static double cohesion(Clusterer clusterer, Cluster cluster) { double norm = cluster.size(); double cohesion = 0; for (Instance inst : cluster.getMembers()) { if (inst instanceof Cluster) { cohesion += cohesion(clusterer, (Cluster)inst); } else { cohesion += clusterer.distance(cluster, inst); } } return cohesion / norm; }
if ((pair._2 instanceof Cluster) == false) { Cluster c = this.createCluster(); c.add(pair._2); curKmeans.put(pair._1, c);
@SuppressWarnings({ "rawtypes", "unchecked" }) protected void updateCentroids(Instance inst, boolean removefrom) { for (String featureName : centroids.keySet()) { Centroid m = centroids.get(featureName); Feature feature = inst.getFeature(featureName); if (feature != null) { if (removefrom) { m.remove(feature); } else { m.add(feature); } if (onlineUpdate) { addFeature(m.getCentroid()); } } } }
public int instanceCount(Collection<? extends Instance> group) { int count = 0; for (Instance i : group) { if (i instanceof Cluster) { count += instanceCount(((Cluster)i).getMembers()); } else { count++; } } return count; }
cluster.add( ds.get(keys.get(0)) ); cluster.updateCentroid(); kmeans.add(cluster); cluster.add( ds.get(key) ); cluster.updateCentroid(); kmeans.add(cluster);
@Override public Iterable<Instance> call(Iterator<Tuple2<String, Instance>> instances) throws Exception { Map<String, Instance> clusters = new HashMap<String, Instance>(); BestClusterFunction bestClusterFunc = new BestClusterFunction(distFunc, clusters, threshold); while (instances.hasNext()) { Tuple2<String, Instance> inst = instances.next(); Tuple2<String, Instance> result = bestClusterFunc.call(inst); Cluster c; if (result._1 == null) { c = clusterFactory.create(); clusters.put(c.getId(), c); } else { c = (Cluster)clusters.get(result._1); } c.add(inst._2); } return clusters.values(); }