/** * Adds up the distances from each point to its closest cluster and returns the sum. * @param datapoints iterable of datapoints. * @param centroids iterable of Centroids. * @return the total cost described above. */ public static double totalClusterCost(Iterable<? extends Vector> datapoints, Iterable<? extends Vector> centroids) { DistanceMeasure distanceMeasure = new EuclideanDistanceMeasure(); UpdatableSearcher searcher = new ProjectionSearch(distanceMeasure, 3, 1); searcher.addAll(centroids); return totalClusterCost(datapoints, searcher); }
/** * Adds up the distances from each point to its closest cluster and returns the sum. * @param datapoints iterable of datapoints. * @param centroids iterable of Centroids. * @return the total cost described above. */ public static double totalClusterCost(Iterable<? extends Vector> datapoints, Iterable<? extends Vector> centroids) { DistanceMeasure distanceMeasure = new EuclideanDistanceMeasure(); UpdatableSearcher searcher = new ProjectionSearch(distanceMeasure, 3, 1); searcher.addAll(centroids); return totalClusterCost(datapoints, searcher); }
/** * Adds up the distances from each point to its closest cluster and returns the sum. * @param datapoints iterable of datapoints. * @param centroids iterable of Centroids. * @return the total cost described above. */ public static double totalClusterCost(Iterable<? extends Vector> datapoints, Iterable<? extends Vector> centroids) { DistanceMeasure distanceMeasure = new EuclideanDistanceMeasure(); UpdatableSearcher searcher = new ProjectionSearch(distanceMeasure, 3, 1); searcher.addAll(centroids); return totalClusterCost(datapoints, searcher); }
/** * Computes the summaries for the distances in each cluster. * @param datapoints iterable of datapoints. * @param centroids iterable of Centroids. * @return a list of OnlineSummarizers where the i-th element is the summarizer corresponding to the cluster whose * index is i. */ public static List<OnlineSummarizer> summarizeClusterDistances(Iterable<? extends Vector> datapoints, Iterable<? extends Vector> centroids, DistanceMeasure distanceMeasure) { UpdatableSearcher searcher = new ProjectionSearch(distanceMeasure, 3, 1); searcher.addAll(centroids); List<OnlineSummarizer> summarizers = new ArrayList<>(); if (searcher.size() == 0) { return summarizers; } for (int i = 0; i < searcher.size(); ++i) { summarizers.add(new OnlineSummarizer()); } for (Vector v : datapoints) { Centroid closest = (Centroid)searcher.search(v, 1).get(0).getValue(); OnlineSummarizer summarizer = summarizers.get(closest.getIndex()); summarizer.add(distanceMeasure.distance(v, closest)); } return summarizers; }
/** * Computes the summaries for the distances in each cluster. * @param datapoints iterable of datapoints. * @param centroids iterable of Centroids. * @return a list of OnlineSummarizers where the i-th element is the summarizer corresponding to the cluster whose * index is i. */ public static List<OnlineSummarizer> summarizeClusterDistances(Iterable<? extends Vector> datapoints, Iterable<? extends Vector> centroids, DistanceMeasure distanceMeasure) { UpdatableSearcher searcher = new ProjectionSearch(distanceMeasure, 3, 1); searcher.addAll(centroids); List<OnlineSummarizer> summarizers = Lists.newArrayList(); if (searcher.size() == 0) { return summarizers; } for (int i = 0; i < searcher.size(); ++i) { summarizers.add(new OnlineSummarizer()); } for (Vector v : datapoints) { Centroid closest = (Centroid)searcher.search(v, 1).get(0).getValue(); OnlineSummarizer summarizer = summarizers.get(closest.getIndex()); summarizer.add(distanceMeasure.distance(v, closest)); } return summarizers; }
/** * Computes the summaries for the distances in each cluster. * @param datapoints iterable of datapoints. * @param centroids iterable of Centroids. * @return a list of OnlineSummarizers where the i-th element is the summarizer corresponding to the cluster whose * index is i. */ public static List<OnlineSummarizer> summarizeClusterDistances(Iterable<? extends Vector> datapoints, Iterable<? extends Vector> centroids, DistanceMeasure distanceMeasure) { UpdatableSearcher searcher = new ProjectionSearch(distanceMeasure, 3, 1); searcher.addAll(centroids); List<OnlineSummarizer> summarizers = Lists.newArrayList(); if (searcher.size() == 0) { return summarizers; } for (int i = 0; i < searcher.size(); ++i) { summarizers.add(new OnlineSummarizer()); } for (Vector v : datapoints) { Centroid closest = (Centroid)searcher.search(v, 1).get(0).getValue(); OnlineSummarizer summarizer = summarizers.get(closest.getIndex()); summarizer.add(distanceMeasure.distance(v, closest)); } return summarizers; }
@Parameters public static List<Object[]> generateData() { return Arrays.asList(new Object[][] { {new ProjectionSearch(new SquaredEuclideanDistanceMeasure(), NUM_PROJECTIONS, SEARCH_SIZE), true}, {new FastProjectionSearch(new SquaredEuclideanDistanceMeasure(), NUM_PROJECTIONS, SEARCH_SIZE), true}, {new ProjectionSearch(new SquaredEuclideanDistanceMeasure(), NUM_PROJECTIONS, SEARCH_SIZE), false}, {new FastProjectionSearch(new SquaredEuclideanDistanceMeasure(), NUM_PROJECTIONS, SEARCH_SIZE), false}, }); }
@Parameterized.Parameters public static List<Object[]> generateData() { RandomUtils.useTestSeed(); Matrix dataPoints = LumpyData.lumpyRandomData(NUM_DATA_POINTS, NUM_DIMENSIONS); Matrix queries = LumpyData.lumpyRandomData(NUM_QUERIES, NUM_DIMENSIONS); DistanceMeasure distanceMeasure = new CosineDistanceMeasure(); Searcher bruteSearcher = new BruteSearch(distanceMeasure); bruteSearcher.addAll(dataPoints); Pair<List<List<WeightedThing<Vector>>>, Long> reference = getResultsAndRuntime(bruteSearcher, queries); Pair<List<WeightedThing<Vector>>, Long> referenceSearchFirst = getResultsAndRuntimeSearchFirst(bruteSearcher, queries); double bruteSearchAvgTime = reference.getSecond() / (queries.numRows() * 1.0); System.out.printf("BruteSearch: avg_time(1 query) %f[s]\n", bruteSearchAvgTime); return Arrays.asList(new Object[][]{ // NUM_PROJECTIONS = 3 // SEARCH_SIZE = 10 {new ProjectionSearch(distanceMeasure, 3, 10), dataPoints, queries, reference, referenceSearchFirst}, {new FastProjectionSearch(distanceMeasure, 3, 10), dataPoints, queries, reference, referenceSearchFirst}, // NUM_PROJECTIONS = 5 // SEARCH_SIZE = 5 {new ProjectionSearch(distanceMeasure, 5, 5), dataPoints, queries, reference, referenceSearchFirst}, {new FastProjectionSearch(distanceMeasure, 5, 5), dataPoints, queries, reference, referenceSearchFirst}, } ); }
@Parameterized.Parameters public static List<Object[]> generateData() { RandomUtils.useTestSeed(); Matrix dataPoints = multiNormalRandomData(NUM_DATA_POINTS, NUM_DIMENSIONS); return Arrays.asList(new Object[][]{ {new ProjectionSearch(new EuclideanDistanceMeasure(), NUM_PROJECTIONS, SEARCH_SIZE), dataPoints}, {new FastProjectionSearch(new EuclideanDistanceMeasure(), NUM_PROJECTIONS, SEARCH_SIZE), dataPoints}, {new LocalitySensitiveHashSearch(new EuclideanDistanceMeasure(), SEARCH_SIZE), dataPoints}, }); }