System.out.println("WARNING: If you are running in Hadoop local mode, please use the --sequential option, " + "as the MapReduce option will not work properly"); int numberOfClusters = ClusterCountReader.getNumberOfClusters(input, conf); conf.set("clusterOutputPath", input.toString()); Job job = new Job(conf, "ClusterOutputPostProcessor Driver running over input: " + input);
System.out.println("WARNING: If you are running in Hadoop local mode, please use the --sequential option, " + "as the MapReduce option will not work properly"); int numberOfClusters = ClusterCountReader.getNumberOfClusters(input, conf); conf.set("clusterOutputPath", input.toString()); Job job = new Job(conf, "ClusterOutputPostProcessor Driver running over input: " + input);
System.out.println("WARNING: If you are running in Hadoop local mode, please use the --sequential option, " + "as the MapReduce option will not work properly"); int numberOfClusters = ClusterCountReader.getNumberOfClusters(input, conf); conf.set("clusterOutputPath", input.toString()); Job job = new Job(conf, "ClusterOutputPostProcessor Driver running over input: " + input);
/** * Story: User wants to use cluster post processor after canopy clustering and then run clustering on the * output clusters */ @Test public void testGetNumberOfClusters() throws Exception { List<VectorWritable> points = getPointsWritable(REFERENCE); Path pointsPath = getTestTempDirPath("points"); Configuration conf = getConfiguration(); ClusteringTestUtils.writePointsToFile(points, new Path(pointsPath, "file1"), fs, conf); ClusteringTestUtils.writePointsToFile(points, new Path(pointsPath, "file2"), fs, conf); outputPathForCanopy = getTestTempDirPath("canopy"); outputPathForKMeans = getTestTempDirPath("kmeans"); topLevelClustering(pointsPath, conf); int numberOfClusters = ClusterCountReader.getNumberOfClusters(outputPathForKMeans, conf); Assert.assertEquals(2, numberOfClusters); verifyThatNumberOfClustersIsCorrect(conf, new Path(outputPathForKMeans, new Path("clusteredPoints"))); }