/** * Story: User can specify a clustering limit that prevents output of small * clusters */ @Test public void testCanopyMapperClusterFilter() throws Exception { CanopyMapper mapper = new CanopyMapper(); Configuration conf = getConfiguration(); conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, manhattanDistanceMeasure .getClass().getName()); conf.set(CanopyConfigKeys.T1_KEY, String.valueOf(3.1)); conf.set(CanopyConfigKeys.T2_KEY, String.valueOf(2.1)); conf.set(CanopyConfigKeys.CF_KEY, "3"); DummyRecordWriter<Text, VectorWritable> writer = new DummyRecordWriter<Text, VectorWritable>(); Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable>.Context context = DummyRecordWriter .build(mapper, conf, writer); mapper.setup(context); List<VectorWritable> points = getPointsWritable(); // map the data for (VectorWritable point : points) { mapper.map(new Text(), point, context); } mapper.cleanup(context); assertEquals("Number of map results", 1, writer.getData().size()); // now verify the output List<VectorWritable> data = writer.getValue(new Text("centroid")); assertEquals("Number of centroids", 2, data.size()); }
CanopyMapper mapper = new CanopyMapper(); Configuration conf = getConfiguration(); conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, manhattanDistanceMeasure Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable>.Context context = DummyRecordWriter .build(mapper, conf, writer); mapper.setup(context); mapper.map(new Text(), point, context); mapper.cleanup(context); assertEquals("Number of map results", 1, writer.getData().size());
CanopyMapper mapper = new CanopyMapper(); Configuration conf = getConfiguration(); conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, euclideanDistanceMeasure Mapper<WritableComparable<?>, VectorWritable, Text, VectorWritable>.Context context = DummyRecordWriter .build(mapper, conf, writer); mapper.setup(context); mapper.map(new Text(), point, context); mapper.cleanup(context); assertEquals("Number of map results", 1, writer.getData().size());