private void assertVectorsWithoutOutlierRemoval() { assertFirstClusterWithoutOutlierRemoval(); assertSecondClusterWithoutOutlierRemoval(); assertThirdClusterWithoutOutlierRemoval(); }
@Test public void testVectorClassificationWithOutlierRemoval() throws Exception { List<VectorWritable> points = getPointsWritable(REFERENCE); pointsPath = getTestTempDirPath("points"); clusteringOutputPath = getTestTempDirPath("output"); classifiedOutputPath = getTestTempDirPath("classify"); conf = getConfiguration(); ClusteringTestUtils.writePointsToFile(points, new Path(pointsPath, "file1"), fs, conf); runClustering(pointsPath, conf, true); runClassificationWithOutlierRemoval(true); collectVectorsForAssertion(); assertVectorsWithOutlierRemoval(); }
@Test public void testVectorClassificationWithoutOutlierRemoval() throws Exception { List<VectorWritable> points = getPointsWritable(REFERENCE); pointsPath = getTestTempDirPath("points"); clusteringOutputPath = getTestTempDirPath("output"); classifiedOutputPath = getTestTempDirPath("classify"); conf = getConfiguration(); ClusteringTestUtils.writePointsToFile(points, new Path(pointsPath, "file1"), fs, conf); runClustering(pointsPath, conf, true); runClassificationWithoutOutlierRemoval(); collectVectorsForAssertion(); assertVectorsWithoutOutlierRemoval(); }
private void runClassificationWithOutlierRemoval(boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException { ClusterClassificationDriver.run(getConfiguration(), pointsPath, clusteringOutputPath, classifiedOutputPath, 0.73, true, runSequential); }
private void assertVectorsWithOutlierRemoval() { checkClustersWithOutlierRemoval(); }
private void collectVectorsForAssertion() throws IOException { Path[] partFilePaths = FileUtil.stat2Paths(fs .globStatus(classifiedOutputPath)); FileStatus[] listStatus = fs.listStatus(partFilePaths, PathFilters.partFilter()); for (FileStatus partFile : listStatus) { SequenceFile.Reader classifiedVectors = new SequenceFile.Reader(fs, partFile.getPath(), conf); Writable clusterIdAsKey = new IntWritable(); WeightedPropertyVectorWritable point = new WeightedPropertyVectorWritable(); while (classifiedVectors.next(clusterIdAsKey, point)) { collectVector(clusterIdAsKey.toString(), point.getVector()); } } }
private void checkClustersWithOutlierRemoval() { Set<String> reference = Sets.newHashSet("{0:9.0,1:9.0}", "{0:1.0,1:1.0}"); List<List<Vector>> clusters = Lists.newArrayList(); clusters.add(firstCluster); clusters.add(secondCluster); clusters.add(thirdCluster); int singletonCnt = 0; int emptyCnt = 0; for (List<Vector> vList : clusters) { if (vList.isEmpty()) { emptyCnt++; } else { singletonCnt++; assertEquals("expecting only singleton clusters; got size=" + vList.size(), 1, vList.size()); if (vList.get(0).getClass().equals(NamedVector.class)) { Assert.assertTrue("not expecting cluster:" + ((NamedVector) vList.get(0)).getDelegate().asFormatString(), reference.contains(((NamedVector) vList.get(0)).getDelegate().asFormatString())); reference.remove(((NamedVector)vList.get(0)).getDelegate().asFormatString()); } else if (vList.get(0).getClass().equals(RandomAccessSparseVector.class)) { Assert.assertTrue("not expecting cluster:" + vList.get(0).asFormatString(), reference.contains(vList.get(0).asFormatString())); reference.remove(vList.get(0).asFormatString()); } } } Assert.assertEquals("Different number of empty clusters than expected!", 1, emptyCnt); Assert.assertEquals("Different number of singletons than expected!", 2, singletonCnt); Assert.assertEquals("Didn't match all reference clusters!", 0, reference.size()); }
private void runClassificationWithoutOutlierRemoval() throws IOException, InterruptedException, ClassNotFoundException { ClusterClassificationDriver.run(getConfiguration(), pointsPath, clusteringOutputPath, classifiedOutputPath, 0.0, true, true); }
@Test public void testVectorClassificationWithOutlierRemovalMR() throws Exception { List<VectorWritable> points = getPointsWritable(REFERENCE); pointsPath = getTestTempDirPath("points"); clusteringOutputPath = getTestTempDirPath("output"); classifiedOutputPath = getTestTempDirPath("classifiedClusters"); HadoopUtil.delete(conf, classifiedOutputPath); conf = getConfiguration(); ClusteringTestUtils.writePointsToFile(points, true, new Path(pointsPath, "file1"), fs, conf); runClustering(pointsPath, conf, false); runClassificationWithOutlierRemoval(false); collectVectorsForAssertion(); assertVectorsWithOutlierRemoval(); }
@Override @Before public void setUp() throws Exception { super.setUp(); Configuration conf = getConfiguration(); fs = FileSystem.get(conf); firstCluster = Lists.newArrayList(); secondCluster = Lists.newArrayList(); thirdCluster = Lists.newArrayList(); }