@Override protected void cleanup(Context context) throws IOException, InterruptedException { List<Cluster> clusters = classifier.getModels(); ClusterWritable cw = new ClusterWritable(); for (int index = 0; index < clusters.size(); index++) { cw.setValue(clusters.get(index)); context.write(new IntWritable(index), cw); } super.cleanup(context); }
public void readFromSeqFiles(Configuration conf, Path path) throws IOException { Configuration config = new Configuration(); List<Cluster> clusters = Lists.newArrayList(); for (ClusterWritable cw : new SequenceFileDirValueIterable<ClusterWritable>(path, PathType.LIST, PathFilters.logsCRCFilter(), config)) { Cluster cluster = cw.getValue(); cluster.configure(conf); clusters.add(cluster); } this.models = clusters; modelClass = models.get(0).getClass().getName(); this.policy = readPolicy(path); }
@Override protected void reduce(IntWritable key, Iterable<ClusterWritable> values, Context context) throws IOException, InterruptedException { Iterator<ClusterWritable> iter = values.iterator(); Cluster first = iter.next().getValue(); // there must always be at least one while (iter.hasNext()) { Cluster cluster = iter.next().getValue(); first.observe(cluster); } List<Cluster> models = Lists.newArrayList(); models.add(first); classifier = new ClusterClassifier(models, policy); classifier.close(); context.write(key, new ClusterWritable(first)); }
@Override protected void reduce(IntWritable key, Iterable<ClusterWritable> values, Context context) throws IOException, InterruptedException { Iterator<ClusterWritable> iter = values.iterator(); Cluster first = iter.next().getValue(); // there must always be at least one while (iter.hasNext()) { Cluster cluster = iter.next().getValue(); first.observe(cluster); } List<Cluster> models = Lists.newArrayList(); models.add(first); classifier = new ClusterClassifier(models, policy); classifier.close(); context.write(key, new ClusterWritable(first)); }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { List<Cluster> clusters = classifier.getModels(); ClusterWritable cw = new ClusterWritable(); for (int index = 0; index < clusters.size(); index++) { cw.setValue(clusters.get(index)); context.write(new IntWritable(index), cw); } super.cleanup(context); }
public void readFromSeqFiles(Configuration conf, Path path) throws IOException { Configuration config = new Configuration(); List<Cluster> clusters = Lists.newArrayList(); for (ClusterWritable cw : new SequenceFileDirValueIterable<ClusterWritable>(path, PathType.LIST, PathFilters.logsCRCFilter(), config)) { Cluster cluster = cw.getValue(); cluster.configure(conf); clusters.add(cluster); } this.models = clusters; modelClass = models.get(0).getClass().getName(); this.policy = readPolicy(path); }
@Override protected void reduce(IntWritable key, Iterable<ClusterWritable> values, Context context) throws IOException, InterruptedException { Iterator<ClusterWritable> iter = values.iterator(); Cluster first = iter.next().getValue(); // there must always be at least one while (iter.hasNext()) { Cluster cluster = iter.next().getValue(); first.observe(cluster); } List<Cluster> models = new ArrayList<>(); models.add(first); classifier = new ClusterClassifier(models, policy); classifier.close(); context.write(key, new ClusterWritable(first)); }
@Override protected void cleanup(Context context) throws IOException, InterruptedException { List<Cluster> clusters = classifier.getModels(); ClusterWritable cw = new ClusterWritable(); for (int index = 0; index < clusters.size(); index++) { cw.setValue(clusters.get(index)); context.write(new IntWritable(index), cw); } super.cleanup(context); }
public static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException { List<Cluster> clusters = new ArrayList<>(); FileSystem fileSystem = clusterOutputPath.getFileSystem(conf); FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter()); Iterator<?> it = new SequenceFileDirValueIterator<>( clusterFiles[0].getPath(), PathType.LIST, PathFilters.partFilter(), null, false, conf); while (it.hasNext()) { ClusterWritable next = (ClusterWritable) it.next(); Cluster cluster = next.getValue(); cluster.configure(conf); clusters.add(cluster); } return clusters; }
try { Writable key = new Text(); ClusterWritable clusterWritable = new ClusterWritable(); assertTrue("more to come", reader.next(key, clusterWritable)); assertEquals("1st key", "C-0", key.toString()); refCenters.add(new Pair<Double,Double>(1.5,1.5)); refCenters.add(new Pair<Double,Double>(4.333333333333334,4.333333333333334)); Pair<Double,Double> c = new Pair<Double,Double>(clusterWritable.getValue() .getCenter().get(0), clusterWritable.getValue().getCenter().get(1)); assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON)); assertTrue("more to come", reader.next(key, clusterWritable)); assertEquals("2nd key", "C-1", key.toString()); c = new Pair<Double,Double>(clusterWritable.getValue().getCenter().get(0), clusterWritable.getValue().getCenter().get(1)); assertTrue("center " + c + " not found", findAndRemove(c, refCenters, EPSILON)); assertFalse("more to come", reader.next(key, clusterWritable));
public void writeToSeqFiles(Path path) throws IOException { writePolicy(policy, path); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(path.toUri(), config); ClusterWritable cw = new ClusterWritable(); for (int i = 0; i < models.size(); i++) { try (SequenceFile.Writer writer = new SequenceFile.Writer(fs, config, new Path(path, "part-" + String.format(Locale.ENGLISH, "%05d", i)), IntWritable.class, ClusterWritable.class)) { Cluster cluster = models.get(i); cw.setValue(cluster); Writable key = new IntWritable(i); writer.append(key, cw); } } }
public void readFromSeqFiles(Configuration conf, Path path) throws IOException { Configuration config = new Configuration(); List<Cluster> clusters = new ArrayList<>(); for (ClusterWritable cw : new SequenceFileDirValueIterable<ClusterWritable>(path, PathType.LIST, PathFilters.logsCRCFilter(), config)) { Cluster cluster = cw.getValue(); cluster.configure(conf); clusters.add(cluster); } this.models = clusters; modelClass = models.get(0).getClass().getName(); this.policy = readPolicy(path); }
try { Writable key = new Text(); ClusterWritable clusterWritable = new ClusterWritable(); assertTrue("more to come", reader.next(key, clusterWritable)); assertEquals("1st key", "C-0", key.toString()); refCenters.add(new Pair<Double,Double>(1.8,1.8)); refCenters.add(new Pair<Double,Double>(4.433333333333334, 4.433333333333334)); Pair<Double,Double> c = new Pair<Double,Double>(clusterWritable.getValue().getCenter().get(0), clusterWritable.getValue().getCenter().get(1)); assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON)); assertTrue("more to come", reader.next(key, clusterWritable)); assertEquals("2nd key", "C-1", key.toString()); c = new Pair<Double,Double>(clusterWritable.getValue().getCenter().get(0), clusterWritable.getValue().getCenter().get(1)); assertTrue("center "+c+" not found", findAndRemove(c, refCenters, EPSILON)); assertFalse("more to come", reader.next(key, clusterWritable));
public void writeToSeqFiles(Path path) throws IOException { writePolicy(policy, path); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(path.toUri(), config); SequenceFile.Writer writer = null; ClusterWritable cw = new ClusterWritable(); for (int i = 0; i < models.size(); i++) { try { Cluster cluster = models.get(i); cw.setValue(cluster); writer = new SequenceFile.Writer(fs, config, new Path(path, "part-" + String.format(Locale.ENGLISH, "%05d", i)), IntWritable.class, ClusterWritable.class); Writable key = new IntWritable(i); writer.append(key, cw); } finally { Closeables.close(writer, false); } } }
public static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException { List<Cluster> clusters = Lists.newArrayList(); FileSystem fileSystem = clusterOutputPath.getFileSystem(conf); FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter()); Iterator<?> it = new SequenceFileDirValueIterator<Writable>( clusterFiles[0].getPath(), PathType.LIST, PathFilters.partFilter(), null, false, conf); while (it.hasNext()) { ClusterWritable next = (ClusterWritable) it.next(); Cluster cluster = next.getValue(); cluster.configure(conf); clusters.add(cluster); } return clusters; }
public void writeToSeqFiles(Path path) throws IOException { writePolicy(policy, path); Configuration config = new Configuration(); FileSystem fs = FileSystem.get(path.toUri(), config); SequenceFile.Writer writer = null; ClusterWritable cw = new ClusterWritable(); for (int i = 0; i < models.size(); i++) { try { Cluster cluster = models.get(i); cw.setValue(cluster); writer = new SequenceFile.Writer(fs, config, new Path(path, "part-" + String.format(Locale.ENGLISH, "%05d", i)), IntWritable.class, ClusterWritable.class); Writable key = new IntWritable(i); writer.append(key, cw); } finally { Closeables.close(writer, false); } } }
public static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException { List<Cluster> clusters = Lists.newArrayList(); FileSystem fileSystem = clusterOutputPath.getFileSystem(conf); FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter()); Iterator<?> it = new SequenceFileDirValueIterator<Writable>( clusterFiles[0].getPath(), PathType.LIST, PathFilters.partFilter(), null, false, conf); while (it.hasNext()) { ClusterWritable next = (ClusterWritable) it.next(); Cluster cluster = next.getValue(); cluster.configure(conf); clusters.add(cluster); } return clusters; }
@Override protected void reduce(Text arg0, Iterable<VectorWritable> values, Context context) throws IOException, InterruptedException { for (VectorWritable value : values) { Vector point = value.get(); canopyClusterer.addPointToCanopies(point, canopies); } for (Canopy canopy : canopies) { canopy.computeParameters(); if (canopy.getNumObservations() > clusterFilter) { ClusterWritable clusterWritable = new ClusterWritable(); clusterWritable.setValue(canopy); context.write(new Text(canopy.getIdentifier()), clusterWritable); } } }
/** * Populates a list with clusters present in clusters-*-final directory. * * @param clusterOutputPath * The output path of the clustering. * @param conf * The Hadoop Configuration * @return The list of clusters found by the clustering. * @throws IOException */ private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException { List<Cluster> clusterModels = new ArrayList<>(); Path finalClustersPath = finalClustersPath(conf, clusterOutputPath); Iterator<?> it = new SequenceFileDirValueIterator<>(finalClustersPath, PathType.LIST, PathFilters.partFilter(), null, false, conf); while (it.hasNext()) { ClusterWritable next = (ClusterWritable) it.next(); Cluster cluster = next.getValue(); cluster.configure(conf); clusterModels.add(cluster); } return clusterModels; }
@Override protected void reduce(Text arg0, Iterable<VectorWritable> values, Context context) throws IOException, InterruptedException { for (VectorWritable value : values) { Vector point = value.get(); canopyClusterer.addPointToCanopies(point, canopies); } for (Canopy canopy : canopies) { canopy.computeParameters(); if (canopy.getNumObservations() > clusterFilter) { ClusterWritable clusterWritable = new ClusterWritable(); clusterWritable.setValue(canopy); context.write(new Text(canopy.getIdentifier()), clusterWritable); } } }