org.apache.mahout.clustering.iterator.ClusterWritable.getValue java code examples

@Override
protected void reduce(IntWritable key, Iterable<ClusterWritable> values, Context context) throws IOException,
  InterruptedException {
 Iterator<ClusterWritable> iter = values.iterator();
 Cluster first = iter.next().getValue(); // there must always be at least one
 while (iter.hasNext()) {
  Cluster cluster = iter.next().getValue();
  first.observe(cluster);
 }
 List<Cluster> models = Lists.newArrayList();
 models.add(first);
 classifier = new ClusterClassifier(models, policy);
 classifier.close();
 context.write(key, new ClusterWritable(first));
}

@Override
protected void reduce(IntWritable key, Iterable<ClusterWritable> values, Context context) throws IOException,
  InterruptedException {
 Iterator<ClusterWritable> iter = values.iterator();
 Cluster first = iter.next().getValue(); // there must always be at least one
 while (iter.hasNext()) {
  Cluster cluster = iter.next().getValue();
  first.observe(cluster);
 }
 List<Cluster> models = Lists.newArrayList();
 models.add(first);
 classifier = new ClusterClassifier(models, policy);
 classifier.close();
 context.write(key, new ClusterWritable(first));
}

@Override
protected void reduce(IntWritable key, Iterable<ClusterWritable> values, Context context) throws IOException,
  InterruptedException {
 Iterator<ClusterWritable> iter = values.iterator();
 Cluster first = iter.next().getValue(); // there must always be at least one
 while (iter.hasNext()) {
  Cluster cluster = iter.next().getValue();
  first.observe(cluster);
 }
 List<Cluster> models = new ArrayList<>();
 models.add(first);
 classifier = new ClusterClassifier(models, policy);
 classifier.close();
 context.write(key, new ClusterWritable(first));
}

public void readFromSeqFiles(Configuration conf, Path path) throws IOException {
 Configuration config = new Configuration();
 List<Cluster> clusters = Lists.newArrayList();
 for (ClusterWritable cw : new SequenceFileDirValueIterable<ClusterWritable>(path, PathType.LIST,
   PathFilters.logsCRCFilter(), config)) {
  Cluster cluster = cw.getValue();
  cluster.configure(conf);
  clusters.add(cluster);
 }
 this.models = clusters;
 modelClass = models.get(0).getClass().getName();
 this.policy = readPolicy(path);
}

public void readFromSeqFiles(Configuration conf, Path path) throws IOException {
 Configuration config = new Configuration();
 List<Cluster> clusters = Lists.newArrayList();
 for (ClusterWritable cw : new SequenceFileDirValueIterable<ClusterWritable>(path, PathType.LIST,
   PathFilters.logsCRCFilter(), config)) {
  Cluster cluster = cw.getValue();
  cluster.configure(conf);
  clusters.add(cluster);
 }
 this.models = clusters;
 modelClass = models.get(0).getClass().getName();
 this.policy = readPolicy(path);
}

public void readFromSeqFiles(Configuration conf, Path path) throws IOException {
 Configuration config = new Configuration();
 List<Cluster> clusters = new ArrayList<>();
 for (ClusterWritable cw : new SequenceFileDirValueIterable<ClusterWritable>(path, PathType.LIST,
   PathFilters.logsCRCFilter(), config)) {
  Cluster cluster = cw.getValue();
  cluster.configure(conf);
  clusters.add(cluster);
 }
 this.models = clusters;
 modelClass = models.get(0).getClass().getName();
 this.policy = readPolicy(path);
}

public static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
 List<Cluster> clusters = new ArrayList<>();
 FileSystem fileSystem = clusterOutputPath.getFileSystem(conf);
 FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter());
 Iterator<?> it = new SequenceFileDirValueIterator<>(
   clusterFiles[0].getPath(), PathType.LIST, PathFilters.partFilter(),
   null, false, conf);
 while (it.hasNext()) {
  ClusterWritable next = (ClusterWritable) it.next();
  Cluster cluster = next.getValue();
  cluster.configure(conf);
  clusters.add(cluster);
 }
 return clusters;
}

public static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
 List<Cluster> clusters = Lists.newArrayList();
 FileSystem fileSystem = clusterOutputPath.getFileSystem(conf);
 FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter());
 Iterator<?> it = new SequenceFileDirValueIterator<Writable>(
   clusterFiles[0].getPath(), PathType.LIST, PathFilters.partFilter(),
   null, false, conf);
 while (it.hasNext()) {
  ClusterWritable next = (ClusterWritable) it.next();
  Cluster cluster = next.getValue();
  cluster.configure(conf);
  clusters.add(cluster);
 }
 return clusters;
}

public static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
 List<Cluster> clusters = Lists.newArrayList();
 FileSystem fileSystem = clusterOutputPath.getFileSystem(conf);
 FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter());
 Iterator<?> it = new SequenceFileDirValueIterator<Writable>(
   clusterFiles[0].getPath(), PathType.LIST, PathFilters.partFilter(),
   null, false, conf);
 while (it.hasNext()) {
  ClusterWritable next = (ClusterWritable) it.next();
  Cluster cluster = next.getValue();
  cluster.configure(conf);
  clusters.add(cluster);
 }
 return clusters;
}

/**
 * Populates a list with clusters present in clusters-*-final directory.
 * 
 * @param clusterOutputPath
 *          The output path of the clustering.
 * @param conf
 *          The Hadoop Configuration
 * @return The list of clusters found by the clustering.
 * @throws IOException
 */
private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
 List<Cluster> clusterModels = Lists.newArrayList();
 Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
 Iterator<?> it = new SequenceFileDirValueIterator<Writable>(finalClustersPath, PathType.LIST,
   PathFilters.partFilter(), null, false, conf);
 while (it.hasNext()) {
  ClusterWritable next = (ClusterWritable) it.next();
  Cluster cluster = next.getValue();
  cluster.configure(conf);
  clusterModels.add(cluster);
 }
 return clusterModels;
}

/**
 * Populates a list with clusters present in clusters-*-final directory.
 * 
 * @param clusterOutputPath
 *          The output path of the clustering.
 * @param conf
 *          The Hadoop Configuration
 * @return The list of clusters found by the clustering.
 * @throws IOException
 */
private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
 List<Cluster> clusterModels = new ArrayList<>();
 Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
 Iterator<?> it = new SequenceFileDirValueIterator<>(finalClustersPath, PathType.LIST,
   PathFilters.partFilter(), null, false, conf);
 while (it.hasNext()) {
  ClusterWritable next = (ClusterWritable) it.next();
  Cluster cluster = next.getValue();
  cluster.configure(conf);
  clusterModels.add(cluster);
 }
 return clusterModels;
}

/**
 * Populates a list with clusters present in clusters-*-final directory.
 * 
 * @param clusterOutputPath
 *          The output path of the clustering.
 * @param conf
 *          The Hadoop Configuration
 * @return The list of clusters found by the clustering.
 * @throws IOException
 */
private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
 List<Cluster> clusterModels = Lists.newArrayList();
 Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
 Iterator<?> it = new SequenceFileDirValueIterator<Writable>(finalClustersPath, PathType.LIST,
   PathFilters.partFilter(), null, false, conf);
 while (it.hasNext()) {
  ClusterWritable next = (ClusterWritable) it.next();
  Cluster cluster = next.getValue();
  cluster.configure(conf);
  clusterModels.add(cluster);
 }
 return clusterModels;
}

 /**
  * Return if all of the Clusters in the parts in the filePath have converged or not
  * 
  * @param filePath
  *          the file path to the single file containing the clusters
  * @return true if all Clusters are converged
  * @throws IOException
  *           if there was an IO error
  */
 private static boolean isConverged(Path filePath, Configuration conf, FileSystem fs) throws IOException {
  for (FileStatus part : fs.listStatus(filePath, PathFilters.partFilter())) {
   SequenceFileValueIterator<ClusterWritable> iterator = new SequenceFileValueIterator<ClusterWritable>(
     part.getPath(), true, conf);
   while (iterator.hasNext()) {
    ClusterWritable value = iterator.next();
    if (!value.getValue().isConverged()) {
     Closeables.close(iterator, true);
     return false;
    }
   }
  }
  return true;
 }
}

 /**
  * Return if all of the Clusters in the parts in the filePath have converged or not
  * 
  * @param filePath
  *          the file path to the single file containing the clusters
  * @return true if all Clusters are converged
  * @throws IOException
  *           if there was an IO error
  */
 private static boolean isConverged(Path filePath, Configuration conf, FileSystem fs) throws IOException {
  for (FileStatus part : fs.listStatus(filePath, PathFilters.partFilter())) {
   SequenceFileValueIterator<ClusterWritable> iterator = new SequenceFileValueIterator<ClusterWritable>(
     part.getPath(), true, conf);
   while (iterator.hasNext()) {
    ClusterWritable value = iterator.next();
    if (!value.getValue().isConverged()) {
     Closeables.close(iterator, true);
     return false;
    }
   }
  }
  return true;
 }
}

 /**
  * Return if all of the Clusters in the parts in the filePath have converged or not
  * 
  * @param filePath
  *          the file path to the single file containing the clusters
  * @return true if all Clusters are converged
  * @throws IOException
  *           if there was an IO error
  */
 private static boolean isConverged(Path filePath, Configuration conf, FileSystem fs) throws IOException {
  for (FileStatus part : fs.listStatus(filePath, PathFilters.partFilter())) {
   SequenceFileValueIterator<ClusterWritable> iterator = new SequenceFileValueIterator<>(
     part.getPath(), true, conf);
   while (iterator.hasNext()) {
    ClusterWritable value = iterator.next();
    if (!value.getValue().isConverged()) {
     Closeables.close(iterator, true);
     return false;
    }
   }
  }
  return true;
 }
}

/** Be sure that the buildRandomSeeded works in the same way as RandomSeedGenerator.buildRandom */
@Test
public void testRandomSeedGeneratorSeeded() throws Exception {
 List<VectorWritable> points = getPoints();
 Job job = new Job();
 Configuration conf = job.getConfiguration();
 job.setMapOutputValueClass(VectorWritable.class);
 Path input = getTestTempFilePath("random-input");
 Path output = getTestTempDirPath("random-output");
 ClusteringTestUtils.writePointsToFile(points, input, fs, conf);
 
 RandomSeedGenerator.buildRandom(conf, input, output, 4, new ManhattanDistanceMeasure(), 1L);
 int clusterCount = 0;
 Collection<Integer> set = Sets.newHashSet();
 for (ClusterWritable clusterWritable :
    new SequenceFileValueIterable<ClusterWritable>(new Path(output, "part-randomSeed"), true, conf)) {
  clusterCount++;
  Cluster cluster = clusterWritable.getValue();
  int id = cluster.getId();
  assertTrue(set.add(id)); // validate unique id's
  
  Vector v = cluster.getCenter();
  assertVectorEquals(RAW[id], v); // validate values match
 }
 assertEquals(4, clusterCount); // validate sample count
}

/** Story: test random seed generation generates 4 clusters with proper ids and data */
@Test
public void testRandomSeedGenerator() throws Exception {
 List<VectorWritable> points = getPoints();
 Job job = new Job();
 Configuration conf = job.getConfiguration();
 job.setMapOutputValueClass(VectorWritable.class);
 Path input = getTestTempFilePath("random-input");
 Path output = getTestTempDirPath("random-output");
 ClusteringTestUtils.writePointsToFile(points, input, fs, conf);
 
 RandomSeedGenerator.buildRandom(conf, input, output, 4, new ManhattanDistanceMeasure());
 int clusterCount = 0;
 Collection<Integer> set = Sets.newHashSet();
 for (ClusterWritable clusterWritable :
    new SequenceFileValueIterable<ClusterWritable>(new Path(output, "part-randomSeed"), true, conf)) {
  clusterCount++;
  Cluster cluster = clusterWritable.getValue();
  int id = cluster.getId();
  assertTrue(set.add(id)); // Validate unique id's
  
  Vector v = cluster.getCenter();
  assertVectorEquals(RAW[id], v); // Validate values match
 }
 assertEquals(4, clusterCount); // Validate sample count
}

List<ClusterWritable> data = writer.getValue(key);
ClusterWritable clusterWritable = data.get(0);
Canopy canopy = (Canopy) clusterWritable.getValue();
assertEquals(euclideanCentroids.get(i).asFormatString() + " is not equal to "
  + canopy.computeCentroid().asFormatString(),

/** Story: User can cluster points using sequential execution */
@Test
public void testClusteringManhattanSeq() throws Exception {
 List<VectorWritable> points = getPointsWritable();
 Configuration config = getConfiguration();
 ClusteringTestUtils.writePointsToFile(points,
   getTestTempFilePath("testdata/file1"), fs, config);
 // now run the Canopy Driver in sequential mode
 Path output = getTestTempDirPath("output");
 CanopyDriver.run(config, getTestTempDirPath("testdata"), output,
   manhattanDistanceMeasure, 3.1, 2.1, true, 0.0, true);
 // verify output from sequence file
 Path path = new Path(output, "clusters-0-final/part-r-00000");
 int ix = 0;
 for (ClusterWritable clusterWritable : new SequenceFileValueIterable<ClusterWritable>(path, true,
   config)) {
  assertEquals("Center [" + ix + ']', manhattanCentroids.get(ix), clusterWritable.getValue()
    .getCenter());
  ix++;
 }
 path = new Path(output, "clusteredPoints/part-m-0");
 long count = HadoopUtil.countRecords(path, config);
 assertEquals("number of points", points.size(), count);
}

@Test
public void testEigenSeedGenerator() throws Exception {
 List<VectorWritable> points = getPoints();
 Job job = new Job();
 Configuration conf = job.getConfiguration();
 job.setMapOutputValueClass(VectorWritable.class);
 Path input = getTestTempFilePath("eigen-input");
 Path output = getTestTempDirPath("eigen-output");
 ClusteringTestUtils.writePointsToFile(points, input, fs, conf);
 EigenSeedGenerator.buildFromEigens(conf, input, output, 3, new ManhattanDistanceMeasure());
 int clusterCount = 0;
 Collection<Integer> set = new HashSet<Integer>();
 Vector v[] = new Vector[3];
 for (ClusterWritable clusterWritable :
    new SequenceFileValueIterable<ClusterWritable>(
      new Path(output, "part-eigenSeed"), true, conf)) {
  Cluster cluster = clusterWritable.getValue();
  int id = cluster.getId();
  assertTrue(set.add(id)); // validate unique id's
  v[id] = cluster.getCenter();
  clusterCount++;
 }
 assertEquals(3, clusterCount); // validate sample count
 // validate pair-wise orthogonality
 assertEquals(0, v[0].dot(v[1]), 1E-10);
 assertEquals(0, v[1].dot(v[2]), 1E-10);
 assertEquals(0, v[0].dot(v[2]), 1E-10);
}

Popular methods of ClusterWritable

Popular in Java

Updating database using SQL prepared statement
compareTo (BigDecimal)
getSupportFragmentManager (FragmentActivity)
findViewById (Activity)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
Time (java.sql)
Java representation of an SQL TIME value. Provides utilities to format and parse the time's represen
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
Github Copilot alternatives

How to use getValuemethodin org.apache.mahout.clustering.iterator.ClusterWritable

Best Java code snippets using org.apache.mahout.clustering.iterator.ClusterWritable.getValue (Showing top 20 results out of 315)

How to use
getValue
method
in
org.apache.mahout.clustering.iterator.ClusterWritable