private static void clusterData(Configuration conf, Path points, Path canopies, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies); ClusterClassificationDriver.run(conf, points, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
private static void clusterData(Configuration conf, Path points, Path canopies, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies); ClusterClassificationDriver.run(conf, points, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
private static void clusterData(Configuration conf, Path points, Path canopies, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), canopies); ClusterClassificationDriver.run(conf, points, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
public void writeToSeqFiles(Path path) throws IOException { writePolicy(policy, path); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(path.toUri(), config); ClusterWritable cw = new ClusterWritable(); for (int i = 0; i < models.size(); i++) { try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, new Path(path, "part-" + String.format(Locale.ENGLISH, "%05d", i)), IntWritable.class, ClusterWritable.class)) { Cluster cluster = models.get(i); cw.setValue(cluster); Writable key = new IntWritable(i); writer.append(key, cw); } } }
/** * Run the job using supplied arguments * * @param input * the directory pathname for input points * @param clustersIn * the directory pathname for input clusters * @param output * the directory pathname for output points * @param clusterClassificationThreshold * Is a clustering strictness / outlier removal parameter. Its value should be between 0 and 1. Vectors * having pdf below this value will not be clustered. * @param runSequential * if true execute sequential algorithm */ public static void clusterData(Configuration conf, Path input, Path clustersIn, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { if (log.isInfoEnabled()) { log.info("Running Clustering"); log.info("Input: {} Clusters In: {} Out: {}", input, clustersIn, output); } ClusterClassifier.writePolicy(new KMeansClusteringPolicy(), clustersIn); ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
/** * Run the job using supplied arguments * * @param input * the directory pathname for input points * @param clustersIn * the directory pathname for input clusters * @param output * the directory pathname for output points * @param clusterClassificationThreshold * Is a clustering strictness / outlier removal parameter. Its value should be between 0 and 1. Vectors * having pdf below this value will not be clustered. * @param runSequential * if true execute sequential algorithm */ public static void clusterData(Configuration conf, Path input, Path clustersIn, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { if (log.isInfoEnabled()) { log.info("Running Clustering"); log.info("Input: {} Clusters In: {} Out: {}", input, clustersIn, output); } ClusterClassifier.writePolicy(new KMeansClusteringPolicy(), clustersIn); ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
public void writeToSeqFiles(Path path) throws IOException { writePolicy(policy, path); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(path.toUri(), config); SequenceFile.Writer writer = null; ClusterWritable cw = new ClusterWritable(); for (int i = 0; i < models.size(); i++) { try { Cluster cluster = models.get(i); cw.setValue(cluster); writer = new SequenceFile.Writer(fs, config, new Path(path, "part-" + String.format(Locale.ENGLISH, "%05d", i)), IntWritable.class, ClusterWritable.class); Writable key = new IntWritable(i); writer.append(key, cw); } finally { Closeables.close(writer, false); } } }
public void writeToSeqFiles(Path path) throws IOException { writePolicy(policy, path); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(path.toUri(), config); SequenceFile.Writer writer = null; ClusterWritable cw = new ClusterWritable(); for (int i = 0; i < models.size(); i++) { try { Cluster cluster = models.get(i); cw.setValue(cluster); writer = new SequenceFile.Writer(fs, config, new Path(path, "part-" + String.format(Locale.ENGLISH, "%05d", i)), IntWritable.class, ClusterWritable.class); Writable key = new IntWritable(i); writer.append(key, cw); } finally { Closeables.close(writer, false); } } }
/** * Run the job using supplied arguments * * @param input * the directory pathname for input points * @param clustersIn * the directory pathname for input clusters * @param output * the directory pathname for output points * @param clusterClassificationThreshold * Is a clustering strictness / outlier removal parameter. Its value should be between 0 and 1. Vectors * having pdf below this value will not be clustered. * @param runSequential * if true execute sequential algorithm */ public static void clusterData(Configuration conf, Path input, Path clustersIn, Path output, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { if (log.isInfoEnabled()) { log.info("Running Clustering"); log.info("Input: {} Clusters In: {} Out: {}", input, clustersIn, output); } ClusterClassifier.writePolicy(new KMeansClusteringPolicy(), clustersIn); ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), clusterClassificationThreshold, true, runSequential); }
throw new InterruptedException("Cluster Iteration " + iteration + " failed processing " + priorPath); ClusterClassifier.writePolicy(policy, clustersOut); FileSystem fs = FileSystem.get(outPath.toUri(), conf); iteration++;
throw new InterruptedException("Cluster Iteration " + iteration + " failed processing " + priorPath); ClusterClassifier.writePolicy(policy, clustersOut); FileSystem fs = FileSystem.get(outPath.toUri(), conf); iteration++;
throw new InterruptedException("Cluster Iteration " + iteration + " failed processing " + priorPath); ClusterClassifier.writePolicy(policy, clustersOut); FileSystem fs = FileSystem.get(outPath.toUri(), conf); iteration++;
throws IOException, ClassNotFoundException, InterruptedException { ClusterClassifier.writePolicy(new FuzzyKMeansClusteringPolicy(m, convergenceDelta), clustersIn); ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), threshold, emitMostLikely, runSequential);
throws IOException, ClassNotFoundException, InterruptedException { ClusterClassifier.writePolicy(new FuzzyKMeansClusteringPolicy(m, convergenceDelta), clustersIn); ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), threshold, emitMostLikely, runSequential);
throws IOException, ClassNotFoundException, InterruptedException { ClusterClassifier.writePolicy(new FuzzyKMeansClusteringPolicy(m, convergenceDelta), clustersIn); ClusterClassificationDriver.run(conf, input, output, new Path(output, PathDirectory.CLUSTERED_POINTS_DIRECTORY), threshold, emitMostLikely, runSequential);
private void runClustering(Path pointsPath, Configuration conf, Boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { CanopyDriver.run(conf, pointsPath, clusteringOutputPath, new ManhattanDistanceMeasure(), 3.1, 2.1, false, 0.0, runSequential); Path finalClustersPath = new Path(clusteringOutputPath, "clusters-0-final"); ClusterClassifier.writePolicy(new CanopyClusteringPolicy(), finalClustersPath); }
prior.writeToSeqFiles(path); ClusteringPolicy policy = new KMeansClusteringPolicy(); ClusterClassifier.writePolicy(policy, path); assertEquals(3, prior.getModels().size()); System.out.println("Prior");