public static void main(String[] args) throws Exception { ToolRunner.run(new Configuration(), new FuzzyKMeansDriver(), args); }
addInputOption(); addOutputOption(); addOption(DefaultOptionCreator.distanceMeasureOption().create()); addOption(DefaultOptionCreator.clustersInOption() .withDescription("The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy. " + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first") .create()); addOption(DefaultOptionCreator.numClustersOption() .withDescription("The k in k-Means. If specified, then a random selection of k Vectors will be chosen" + " as the Centroid and written to the clusters input path.").create()); addOption(DefaultOptionCreator.convergenceOption().create()); addOption(DefaultOptionCreator.maxIterationsOption().create()); addOption(DefaultOptionCreator.overwriteOption().create()); addOption(M_OPTION, M_OPTION, "coefficient normalization factor, must be greater than 1", true); addOption(DefaultOptionCreator.clusteringOption().create()); addOption(DefaultOptionCreator.emitMostLikelyOption().create()); addOption(DefaultOptionCreator.thresholdOption().create()); addOption(DefaultOptionCreator.methodOption().create()); if (parseArguments(args) == null) { return -1; Path input = getInputPath(); Path clusters = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION)); Path output = getOutputPath(); String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION); if (measureClass == null) { measureClass = SquaredEuclideanDistanceMeasure.class.getName();
boolean runSequential) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Path clustersOut = buildClusters(conf, input, clustersIn, if (runClustering) { log.info("Clustering "); clusterData(conf, input, clustersOut, output, throws IOException, ClassNotFoundException, InterruptedException { Path clustersOut = buildClusters(conf, input, clustersIn, output, convergenceDelta, maxIterations, m, runSequential); if (runClustering) { log.info("Clustering"); clusterData(conf, input, clustersOut,
new ManhattanDistanceMeasure(), 3000.0, 2000.0, false, false); FuzzyKMeansDriver.run(conf, new Path(vectorsFolder), new Path(canopyCentroids, "clusters-0"), new Path(clusterOutput), new TanimotoDistanceMeasure(), 0.01, 20, 2.0f, true, true, 0.0, false);
addInputOption(); addOutputOption(); addOption(DefaultOptionCreator.distanceMeasureOption().create()); addOption(DefaultOptionCreator.clustersInOption() .withDescription("The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy. " + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first") .create()); addOption(DefaultOptionCreator.numClustersOption() .withDescription("The k in k-Means. If specified, then a random selection of k Vectors will be chosen" + " as the Centroid and written to the clusters input path.").create()); addOption(DefaultOptionCreator.convergenceOption().create()); addOption(DefaultOptionCreator.maxIterationsOption().create()); addOption(DefaultOptionCreator.overwriteOption().create()); addOption(M_OPTION, M_OPTION, "coefficient normalization factor, must be greater than 1", true); addOption(DefaultOptionCreator.clusteringOption().create()); addOption(DefaultOptionCreator.emitMostLikelyOption().create()); addOption(DefaultOptionCreator.thresholdOption().create()); addOption(DefaultOptionCreator.methodOption().create()); addOption(DefaultOptionCreator.useSetRandomSeedOption().create()); if (parseArguments(args) == null) { return -1; Path input = getInputPath(); Path clusters = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION)); Path output = getOutputPath(); String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION); if (measureClass == null) {
boolean runSequential) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Path clustersOut = buildClusters(conf, input, clustersIn, if (runClustering) { log.info("Clustering "); clusterData(conf, input, clustersOut, output, throws IOException, ClassNotFoundException, InterruptedException { Path clustersOut = buildClusters(conf, input, clustersIn, output, convergenceDelta, maxIterations, m, runSequential); if (runClustering) { log.info("Clustering"); clusterData(conf, input, clustersOut,
addInputOption(); addOutputOption(); addOption(DefaultOptionCreator.distanceMeasureOption().create()); addOption(DefaultOptionCreator.clustersInOption() .withDescription("The input centroids, as Vectors. Must be a SequenceFile of Writable, Cluster/Canopy. " + "If k is also specified, then a random set of vectors will be selected" + " and written out to this path first") .create()); addOption(DefaultOptionCreator.numClustersOption() .withDescription("The k in k-Means. If specified, then a random selection of k Vectors will be chosen" + " as the Centroid and written to the clusters input path.").create()); addOption(DefaultOptionCreator.convergenceOption().create()); addOption(DefaultOptionCreator.maxIterationsOption().create()); addOption(DefaultOptionCreator.overwriteOption().create()); addOption(M_OPTION, M_OPTION, "coefficient normalization factor, must be greater than 1", true); addOption(DefaultOptionCreator.clusteringOption().create()); addOption(DefaultOptionCreator.emitMostLikelyOption().create()); addOption(DefaultOptionCreator.thresholdOption().create()); addOption(DefaultOptionCreator.methodOption().create()); addOption(DefaultOptionCreator.useSetRandomSeedOption().create()); if (parseArguments(args) == null) { return -1; Path input = getInputPath(); Path clusters = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION)); Path output = getOutputPath(); String measureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION); if (measureClass == null) {
boolean runSequential) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); Path clustersOut = buildClusters(conf, input, clustersIn, if (runClustering) { log.info("Clustering "); clusterData(conf, input, clustersOut, output, throws IOException, ClassNotFoundException, InterruptedException { Path clustersOut = buildClusters(conf, input, clustersIn, output, convergenceDelta, maxIterations, m, runSequential); if (runClustering) { log.info("Clustering"); clusterData(conf, input, clustersOut,
public static void main(String[] args) throws Exception { ToolRunner.run(new Configuration(), new FuzzyKMeansDriver(), args); }
public static void main(String[] args) throws Exception { ToolRunner.run(new Configuration(), new FuzzyKMeansDriver(), args); }
optKey(DefaultOptionCreator.OVERWRITE_OPTION) }; ToolRunner.run(getConfiguration(), new FuzzyKMeansDriver(), args); long count = HadoopUtil.countRecords(new Path(output, "clusteredPoints/part-m-00000"), conf); assertTrue(count > 0);