@Override public void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); Path clusterOutputPath = new Path(conf.get("clusterOutputPath")); //we want to the key to be the index, the value to be the cluster id reverseClusterMappings = ClusterCountReader.getClusterIDs(clusterOutputPath, conf, false); }
System.out.println("WARNING: If you are running in Hadoop local mode, please use the --sequential option, " + "as the MapReduce option will not work properly"); int numberOfClusters = ClusterCountReader.getNumberOfClusters(input, conf); conf.set("clusterOutputPath", input.toString()); Job job = new Job(conf, "ClusterOutputPostProcessor Driver running over input: " + input);
System.out.println("WARNING: If you are running in Hadoop local mode, please use the --sequential option, " + "as the MapReduce option will not work properly"); int numberOfClusters = ClusterCountReader.getNumberOfClusters(input, conf); conf.set("clusterOutputPath", input.toString()); Job job = new Job(conf, "ClusterOutputPostProcessor Driver running over input: " + input);
System.out.println("WARNING: If you are running in Hadoop local mode, please use the --sequential option, " + "as the MapReduce option will not work properly"); int numberOfClusters = ClusterCountReader.getNumberOfClusters(input, conf); conf.set("clusterOutputPath", input.toString()); Job job = new Job(conf, "ClusterOutputPostProcessor Driver running over input: " + input);
@Override public void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); //this give the clusters-x-final directory where the cluster ids can be read Path clusterOutputPath = new Path(conf.get("clusterOutputPath")); //we want the key to be the cluster id, the value to be the index newClusterMappings = ClusterCountReader.getClusterIDs(clusterOutputPath, conf, true); outputVector = new VectorWritable(); }
/** * Story: User wants to use cluster post processor after canopy clustering and then run clustering on the * output clusters */ @Test public void testGetNumberOfClusters() throws Exception { List<VectorWritable> points = getPointsWritable(REFERENCE); Path pointsPath = getTestTempDirPath("points"); Configuration conf = getConfiguration(); ClusteringTestUtils.writePointsToFile(points, new Path(pointsPath, "file1"), fs, conf); ClusteringTestUtils.writePointsToFile(points, new Path(pointsPath, "file2"), fs, conf); outputPathForCanopy = getTestTempDirPath("canopy"); outputPathForKMeans = getTestTempDirPath("kmeans"); topLevelClustering(pointsPath, conf); int numberOfClusters = ClusterCountReader.getNumberOfClusters(outputPathForKMeans, conf); Assert.assertEquals(2, numberOfClusters); verifyThatNumberOfClustersIsCorrect(conf, new Path(outputPathForKMeans, new Path("clusteredPoints"))); }
@Override public void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); Path clusterOutputPath = new Path(conf.get("clusterOutputPath")); //we want to the key to be the index, the value to be the cluster id reverseClusterMappings = ClusterCountReader.getClusterIDs(clusterOutputPath, conf, false); }
@Override public void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); //this give the clusters-x-final directory where the cluster ids can be read Path clusterOutputPath = new Path(conf.get("clusterOutputPath")); //we want the key to be the cluster id, the value to be the index newClusterMappings = ClusterCountReader.getClusterIDs(clusterOutputPath, conf, true); outputVector = new VectorWritable(); }
@Override public void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); Path clusterOutputPath = new Path(conf.get("clusterOutputPath")); //we want to the key to be the index, the value to be the cluster id reverseClusterMappings = ClusterCountReader.getClusterIDs(clusterOutputPath, conf, false); }
@Override public void setup(Context context) throws IOException { Configuration conf = context.getConfiguration(); //this give the clusters-x-final directory where the cluster ids can be read Path clusterOutputPath = new Path(conf.get("clusterOutputPath")); //we want the key to be the cluster id, the value to be the index newClusterMappings = ClusterCountReader.getClusterIDs(clusterOutputPath, conf, true); outputVector = new VectorWritable(); }