for (int i = 0; i < c.length; i++) { c[i] = new Canopy(); }
public String asFormatString() { return "C" + this.getId() + ": " + this.computeCentroid().asFormatString(); }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { for (Canopy canopy : canopies) { canopy.computeParameters(); if (canopy.getNumObservations() > clusterFilter) { context.write(new Text("centroid"), new VectorWritable(canopy .getCenter())); } } super.cleanup(context); } }
@Override public String toString() { return getIdentifier() + ": " + getCenter().asFormatString(); }
ClusterWritable clusterWritable = new ClusterWritable(); for (Canopy canopy : canopies) { canopy.computeParameters(); if (log.isDebugEnabled()) { log.debug("Writing Canopy:{} center:{} numPoints:{} radius:{}", canopy.getIdentifier(), AbstractCluster.formatVector(canopy.getCenter(), null), canopy.getNumObservations(), AbstractCluster.formatVector(canopy.getRadius(), null)); if (canopy.getNumObservations() > clusterFilter) { clusterWritable.setValue(canopy); writer.append(new Text(canopy.getIdentifier()), clusterWritable);
clusters.add(new Kluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure())); } else { throw new IllegalStateException("Bad value class: " + valueClass);
boolean pointStronglyBound = false; for (Canopy canopy : canopies) { double dist = measure.distance(canopy.getCenter().getLengthSquared(), canopy.getCenter(), point); if (dist < t1) { if (log.isDebugEnabled()) { log.debug("Added point: {} to canopy: {}", AbstractCluster.formatVector(point, null), canopy.getIdentifier()); canopy.observe(point); log.debug("Created new Canopy:{} at center:{}", nextCanopyId, AbstractCluster.formatVector(point, null)); canopies.add(new Canopy(point, nextCanopyId++, measure));
@Override protected void reduce(Text arg0, Iterable<VectorWritable> values, Context context) throws IOException, InterruptedException { for (VectorWritable value : values) { Vector point = value.get(); canopyClusterer.addPointToCanopies(point, canopies); } for (Canopy canopy : canopies) { canopy.computeParameters(); if (canopy.getNumObservations() > clusterFilter) { ClusterWritable clusterWritable = new ClusterWritable(); clusterWritable.setValue(canopy); context.write(new Text(canopy.getIdentifier()), clusterWritable); } } }
Vector p1 = ptIter.next(); ptIter.remove(); Canopy canopy = new Canopy(p1, nextCanopyId++, measure); canopies.add(canopy); while (ptIter.hasNext()) { canopy.observe(p2); c.computeParameters();
public static void main(String[] args) { List<Vector> sampleData = new ArrayList<Vector>(); RandomPointsUtil.generateSamples(sampleData, 400, 1, 1, 2); RandomPointsUtil.generateSamples(sampleData, 300, 1, 0, 0.5); RandomPointsUtil.generateSamples(sampleData, 300, 0, 2, 0.1); List<Canopy> canopies = CanopyClusterer.createCanopies(sampleData, new EuclideanDistanceMeasure(), 3.0, 1.5); for (Canopy canopy : canopies) { System.out.println("Canopy id: " + canopy.getId() + " center: " + canopy.getCenter().asFormatString()); } } }
/** * Story: User can cluster points using a EuclideanDistanceMeasure and a * reference implementation */ @Test public void testReferenceEuclidean() throws Exception { // see setUp for cluster creation printCanopies(referenceEuclidean); assertEquals("number of canopies", 3, referenceEuclidean.size()); int[] expectedNumPoints = { 5, 5, 3 }; double[][] expectedCentroids = { { 1.8, 1.8 }, { 4.2, 4.2 }, { 4.666666666666667, 4.666666666666667 } }; for (int canopyIx = 0; canopyIx < referenceEuclidean.size(); canopyIx++) { Canopy testCanopy = referenceEuclidean.get(canopyIx); assertEquals("canopy points " + canopyIx, testCanopy.getNumObservations(), expectedNumPoints[canopyIx]); double[] refCentroid = expectedCentroids[canopyIx]; Vector testCentroid = testCanopy.computeCentroid(); for (int pointIx = 0; pointIx < refCentroid.length; pointIx++) { assertEquals("canopy centroid " + canopyIx + '[' + pointIx + ']', refCentroid[pointIx], testCentroid.get(pointIx), EPSILON); } } }
/** * Iterate through the canopies, adding their centroids to a list * * @param canopies * a List<Canopy> * @return the List<Vector> */ public static List<Vector> getCenters(Iterable<Canopy> canopies) { List<Vector> result = Lists.newArrayList(); for (Canopy canopy : canopies) { result.add(canopy.getCenter()); } return result; }
@Override public String getIdentifier() { return "C-" + getId(); } }
Canopy canopy = (Canopy) clusterWritable.getValue(); assertEquals(manhattanCentroids.get(i).asFormatString() + " is not equal to " + canopy.computeCentroid().asFormatString(), manhattanCentroids.get(i), canopy.computeCentroid()); i++;
/** * Iterate through the canopies, resetting their center to their centroids * * @param canopies * a List<Canopy> */ public static void updateCentroids(Iterable<Canopy> canopies) { for (Canopy canopy : canopies) { canopy.computeParameters(); } }
/** * Create a new Canopy containing the given point and canopyId * * @param center a point in vector space * @param canopyId an int identifying the canopy local to this process only * @param measure a DistanceMeasure to use */ public Canopy(Vector center, int canopyId, DistanceMeasure measure) { super(center, canopyId, measure); observe(center); }
ClusterWritable clusterWritable = new ClusterWritable(); for (Canopy canopy : canopies) { canopy.computeParameters(); if (log.isDebugEnabled()) { log.debug("Writing Canopy:{} center:{} numPoints:{} radius:{}", canopy.getIdentifier(), AbstractCluster.formatVector(canopy.getCenter(), null), canopy.getNumObservations(), AbstractCluster.formatVector(canopy.getRadius(), null)); if (canopy.getNumObservations() > clusterFilter) { clusterWritable.setValue(canopy); writer.append(new Text(canopy.getIdentifier()), clusterWritable);
clusters.add(new Kluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure())); } else { throw new IllegalStateException("Bad value class: " + valueClass);
boolean pointStronglyBound = false; for (Canopy canopy : canopies) { double dist = measure.distance(canopy.getCenter().getLengthSquared(), canopy.getCenter(), point); if (dist < t1) { if (log.isDebugEnabled()) { log.debug("Added point: {} to canopy: {}", AbstractCluster.formatVector(point, null), canopy.getIdentifier()); canopy.observe(point); log.debug("Created new Canopy:{} at center:{}", nextCanopyId, AbstractCluster.formatVector(point, null)); canopies.add(new Canopy(point, nextCanopyId++, measure));
@Override protected void reduce(Text arg0, Iterable<VectorWritable> values, Context context) throws IOException, InterruptedException { for (VectorWritable value : values) { Vector point = value.get(); canopyClusterer.addPointToCanopies(point, canopies); } for (Canopy canopy : canopies) { canopy.computeParameters(); if (canopy.getNumObservations() > clusterFilter) { ClusterWritable clusterWritable = new ClusterWritable(); clusterWritable.setValue(canopy); context.write(new Text(canopy.getIdentifier()), clusterWritable); } } }