org.apache.mahout.clustering.classify.ClusterClassificationDriver java code examples

public int run(String[] args) throws Exception {
 addInputOption();
 addOutputOption();
 addOption(DefaultOptionCreator.methodOption().create());
 addOption(DefaultOptionCreator.clustersInOption()
   .withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.")
   .create());
 if (parseArguments(args) == null) {
  return -1;
 Path input = getInputPath();
 Path output = getOutputPath();
 if (getConf() == null) {
  setConf(new Configuration());
 Path clustersIn = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
 boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
   DefaultOptionCreator.SEQUENTIAL_METHOD);
 if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
  clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
 run(getConf(), input, clustersIn, output, clusterClassificationThreshold, true, runSequential);

public static void main(String[] args) throws Exception {
 ToolRunner.run(new Configuration(), new ClusterClassificationDriver(), args);
}

public static void run(Configuration conf, Path input, Path clusteringOutputPath, Path output,
  double clusterClassificationThreshold, boolean emitMostLikely, boolean runSequential) throws IOException,
  InterruptedException, ClassNotFoundException {
 if (runSequential) {
  classifyClusterSeq(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely);
 } else {
  classifyClusterMR(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely);
 }
 
}

private static void classifyClusterSeq(Configuration conf, Path input, Path clusters, Path output,
  Double clusterClassificationThreshold, boolean emitMostLikely) throws IOException {
 List<Cluster> clusterModels = populateClusterModels(clusters, conf);
 ClusteringPolicy policy = ClusterClassifier.readPolicy(finalClustersPath(conf, clusters));
 ClusterClassifier clusterClassifier = new ClusterClassifier(clusterModels, policy);
 selectCluster(input, clusterModels, clusterClassifier, output, clusterClassificationThreshold, emitMostLikely);
 
}

private static void clusterData(Configuration conf,
                Path points,
                Path canopies,
                Path output,
                double clusterClassificationThreshold,
                boolean runSequential)
 throws IOException, InterruptedException, ClassNotFoundException {
 ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies);
 ClusterClassificationDriver.run(conf, points, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
                 clusterClassificationThreshold, true, runSequential);
}

if (shouldClassify(pdfPerCluster, clusterClassificationThreshold)) {
 classifyAndWrite(clusterModels, clusterClassificationThreshold, emitMostLikely, writer, new VectorWritable(vector), pdfPerCluster);

/**
 * Populates a list with clusters present in clusters-*-final directory.
 * 
 * @param clusterOutputPath
 *          The output path of the clustering.
 * @param conf
 *          The Hadoop Configuration
 * @return The list of clusters found by the clustering.
 * @throws IOException
 */
private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
 List<Cluster> clusterModels = new ArrayList<>();
 Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
 Iterator<?> it = new SequenceFileDirValueIterator<>(finalClustersPath, PathType.LIST,
   PathFilters.partFilter(), null, false, conf);
 while (it.hasNext()) {
  ClusterWritable next = (ClusterWritable) it.next();
  Cluster cluster = next.getValue();
  cluster.configure(conf);
  clusterModels.add(cluster);
 }
 return clusterModels;
}

private static void classifyClusterSeq(Configuration conf, Path input, Path clusters, Path output,
  Double clusterClassificationThreshold, boolean emitMostLikely) throws IOException {
 List<Cluster> clusterModels = populateClusterModels(clusters, conf);
 ClusteringPolicy policy = ClusterClassifier.readPolicy(finalClustersPath(conf, clusters));
 ClusterClassifier clusterClassifier = new ClusterClassifier(clusterModels, policy);
 selectCluster(input, clusterModels, clusterClassifier, output, clusterClassificationThreshold, emitMostLikely);
 
}

private static void clusterData(Configuration conf,
                Path points,
                Path canopies,
                Path output,
                double clusterClassificationThreshold,
                boolean runSequential)
 throws IOException, InterruptedException, ClassNotFoundException {
 ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies);
 ClusterClassificationDriver.run(conf, points, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
                 clusterClassificationThreshold, true, runSequential);
}

if (shouldClassify(pdfPerCluster, clusterClassificationThreshold)) {
 classifyAndWrite(clusterModels, clusterClassificationThreshold, emitMostLikely, writer, new VectorWritable(vector), pdfPerCluster);

/**
 * Populates a list with clusters present in clusters-*-final directory.
 * 
 * @param clusterOutputPath
 *          The output path of the clustering.
 * @param conf
 *          The Hadoop Configuration
 * @return The list of clusters found by the clustering.
 * @throws IOException
 */
private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
 List<Cluster> clusterModels = Lists.newArrayList();
 Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
 Iterator<?> it = new SequenceFileDirValueIterator<Writable>(finalClustersPath, PathType.LIST,
   PathFilters.partFilter(), null, false, conf);
 while (it.hasNext()) {
  ClusterWritable next = (ClusterWritable) it.next();
  Cluster cluster = next.getValue();
  cluster.configure(conf);
  clusterModels.add(cluster);
 }
 return clusterModels;
}

public int run(String[] args) throws Exception {
 addInputOption();
 addOutputOption();
 addOption(DefaultOptionCreator.methodOption().create());
 addOption(DefaultOptionCreator.clustersInOption()
   .withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.")
   .create());
 if (parseArguments(args) == null) {
  return -1;
 Path input = getInputPath();
 Path output = getOutputPath();
 if (getConf() == null) {
  setConf(new Configuration());
 Path clustersIn = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
 boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
   DefaultOptionCreator.SEQUENTIAL_METHOD);
 if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
  clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
 run(getConf(), input, clustersIn, output, clusterClassificationThreshold, true, runSequential);

private static void classifyClusterSeq(Configuration conf, Path input, Path clusters, Path output,
  Double clusterClassificationThreshold, boolean emitMostLikely) throws IOException {
 List<Cluster> clusterModels = populateClusterModels(clusters, conf);
 ClusteringPolicy policy = ClusterClassifier.readPolicy(finalClustersPath(conf, clusters));
 ClusterClassifier clusterClassifier = new ClusterClassifier(clusterModels, policy);
 selectCluster(input, clusterModels, clusterClassifier, output, clusterClassificationThreshold, emitMostLikely);
 
}

public static void run(Configuration conf, Path input, Path clusteringOutputPath, Path output,
  double clusterClassificationThreshold, boolean emitMostLikely, boolean runSequential) throws IOException,
  InterruptedException, ClassNotFoundException {
 if (runSequential) {
  classifyClusterSeq(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely);
 } else {
  classifyClusterMR(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely);
 }
 
}

private static void clusterData(Configuration conf,
                Path points,
                Path canopies,
                Path output,
                double clusterClassificationThreshold,
                boolean runSequential)
 throws IOException, InterruptedException, ClassNotFoundException {
 ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies);
 ClusterClassificationDriver.run(conf, points, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY),
                 clusterClassificationThreshold, true, runSequential);
}

if (shouldClassify(pdfPerCluster, clusterClassificationThreshold)) {
 classifyAndWrite(clusterModels, clusterClassificationThreshold, emitMostLikely, writer, new VectorWritable(vector), pdfPerCluster);

public static void main(String[] args) throws Exception {
 ToolRunner.run(new Configuration(), new ClusterClassificationDriver(), args);
}

/**
 * Populates a list with clusters present in clusters-*-final directory.
 * 
 * @param clusterOutputPath
 *          The output path of the clustering.
 * @param conf
 *          The Hadoop Configuration
 * @return The list of clusters found by the clustering.
 * @throws IOException
 */
private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
 List<Cluster> clusterModels = Lists.newArrayList();
 Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
 Iterator<?> it = new SequenceFileDirValueIterator<Writable>(finalClustersPath, PathType.LIST,
   PathFilters.partFilter(), null, false, conf);
 while (it.hasNext()) {
  ClusterWritable next = (ClusterWritable) it.next();
  Cluster cluster = next.getValue();
  cluster.configure(conf);
  clusterModels.add(cluster);
 }
 return clusterModels;
}

public int run(String[] args) throws Exception {
 addInputOption();
 addOutputOption();
 addOption(DefaultOptionCreator.methodOption().create());
 addOption(DefaultOptionCreator.clustersInOption()
   .withDescription("The input centroids, as Vectors.  Must be a SequenceFile of Writable, Cluster/Canopy.")
   .create());
 if (parseArguments(args) == null) {
  return -1;
 Path input = getInputPath();
 Path output = getOutputPath();
 if (getConf() == null) {
  setConf(new Configuration());
 Path clustersIn = new Path(getOption(DefaultOptionCreator.CLUSTERS_IN_OPTION));
 boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(
   DefaultOptionCreator.SEQUENTIAL_METHOD);
 if (hasOption(DefaultOptionCreator.OUTLIER_THRESHOLD)) {
  clusterClassificationThreshold = Double.parseDouble(getOption(DefaultOptionCreator.OUTLIER_THRESHOLD));
 run(getConf(), input, clustersIn, output, clusterClassificationThreshold, true, runSequential);

public static void run(Configuration conf, Path input, Path clusteringOutputPath, Path output,
  double clusterClassificationThreshold, boolean emitMostLikely, boolean runSequential) throws IOException,
  InterruptedException, ClassNotFoundException {
 if (runSequential) {
  classifyClusterSeq(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely);
 } else {
  classifyClusterMR(conf, input, clusteringOutputPath, output, clusterClassificationThreshold, emitMostLikely);
 }
 
}

Javadoc

Classifies the vectors into different clusters found by the clustering algorithm.

Most used methods

run
CLI to run Cluster Classification Driver.
<init>
Constructor to be used by the ToolRunner.
addInputOption
addOption
addOutputOption
classifyAndWrite
classifyClusterMR
classifyClusterSeq
finalClustersPath
getConf
getInputPath
getOption

Popular in Java

Making http post requests using okhttp
getApplicationContext (Context)
setContentView (Activity)
notifyDataSetChanged (ArrayAdapter)
URI (java.net)
A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
ResourceBundle (java.util)
ResourceBundle is an abstract class which is the superclass of classes which provide Locale-specifi
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
Container (java.awt)
A generic Abstract Window Toolkit(AWT) container object is a component that can contain other AWT co
Reference (javax.naming)
From CI to AI: The AI layer in your organization

How to useClusterClassificationDriver in org.apache.mahout.clustering.classify

Best Java code snippets using org.apache.mahout.clustering.classify.ClusterClassificationDriver (Showing top 20 results out of 315)

How to use
ClusterClassificationDriver
in
org.apache.mahout.clustering.classify