private List<Cluster> initKMeans(DataSet ds) { List<Cluster> kmeans = new LinkedList<Cluster>(); int ki = (ds.size() < k) ? ds.size(): k; // randomly pick k instances as the initial k means ArrayList<String> indexes = new ArrayList<String>(ds.size()); ArrayList<String> keys = new ArrayList<String>(ds.getKeys()); for (int i = 0; i < keys.size(); i++) { indexes.add( keys.get(i) ); } Collections.shuffle(indexes); for (int i = 0; i < ki; i++) { Cluster c = this.createCluster(); c.add( ds.get(indexes.get(i)) ); c.updateCentroid(); kmeans.add(c); } return kmeans; }
bestCluster = createCluster(); bestCluster.add(inst); if (!onlineUpdate) bestCluster.updateCentroid(); clusters.add(bestCluster); c.updateCentroid();
cluster.updateCentroid(); kmeans.add(cluster); cluster = this.createCluster(); cluster.add( ds.get(key) ); cluster.updateCentroid(); kmeans.add(cluster);
@SuppressWarnings("unchecked") private void updateCluster(Instance inst, Cluster cluster) { if (inst instanceof Cluster) { // merge the clusters Cluster c = (Cluster)inst; for (String key : c.getCentroids().keySet()) { Centroid<Feature> update = c.getCentroids().get(key); Centroid<Feature> centroid = cluster.getCentroids().get(key); // get all the aggregate feature values associated with update for (Feature f : update.getAggregatableCentroid()) { centroid.add(f); } } // after merging cluster we should manually update the resulting centroid cluster.updateCentroid(); // TODO should handle merging the cluster members } else { // simply add the instance to the cluster cluster.add(inst); } }