/** * Process Sequentially. Reads the vectors one by one, and puts them into respective directory, named after * their clusterId. * * @param input The output path provided to the clustering algorithm, whose would be post processed. Hint : The * path of the directory containing clusters-*-final and clusteredPoints. * @param output The post processed data would be stored at this path. */ private static void postProcessSeq(Path input, Path output) throws IOException { ClusterOutputPostProcessor clusterOutputPostProcessor = new ClusterOutputPostProcessor(input, output, new Configuration()); clusterOutputPostProcessor.process(); }
/** * Finds out the cluster directory of the vector and writes it into the specified cluster. */ private void putVectorInRespectiveCluster(String clusterId, WeightedVectorWritable point) throws IOException { Writer writer = findWriterForVector(clusterId); postProcessedClusterDirectories.put(clusterId, PathDirectory.getClusterPathForClusterId(clusterPostProcessorOutput, clusterId)); writeVectorToCluster(writer, point); }
private Map<String,Path> ouputPostProcessing(Configuration conf) throws IOException { ClusterOutputPostProcessor clusterOutputPostProcessor = new ClusterOutputPostProcessor(outputPath, outputPath, conf); clusterOutputPostProcessor.process(); return clusterOutputPostProcessor.getPostProcessedClusterDirectories(); }
/** * This method takes the clustered points output by the clustering algorithms as input and writes them into * their respective clusters. */ public void process() throws IOException { createPostProcessDirectory(); for (Pair<?, WeightedVectorWritable> record : new SequenceFileDirIterable<Writable, WeightedVectorWritable>(clusteredPoints, PathType.GLOB, PathFilters.partFilter(), null, false, conf)) { String clusterId = record.getFirst().toString().trim(); putVectorInRespectiveCluster(clusterId, record.getSecond()); } IOUtils.close(writersForClusters.values()); writersForClusters.clear(); }
/** * This method takes the clustered points output by the clustering algorithms as input and writes them into * their respective clusters. */ public void process() throws IOException { createPostProcessDirectory(); for (Pair<?, WeightedVectorWritable> record : new SequenceFileDirIterable<Writable, WeightedVectorWritable>(clusteredPoints, PathType.GLOB, PathFilters.partFilter(), null, false, conf)) { String clusterId = record.getFirst().toString().trim(); putVectorInRespectiveCluster(clusterId, record.getSecond()); } IOUtils.close(writersForClusters.values()); writersForClusters.clear(); }
/** * Process Sequentially. Reads the vectors one by one, and puts them into respective directory, named after * their clusterId. * * @param input The output path provided to the clustering algorithm, whose would be post processed. Hint : The * path of the directory containing clusters-*-final and clusteredPoints. * @param output The post processed data would be stored at this path. */ private static void postProcessSeq(Path input, Path output) throws IOException { ClusterOutputPostProcessor clusterOutputPostProcessor = new ClusterOutputPostProcessor(input, output, new Configuration()); clusterOutputPostProcessor.process(); }
/** * Finds out the cluster directory of the vector and writes it into the specified cluster. */ private void putVectorInRespectiveCluster(String clusterId, WeightedVectorWritable point) throws IOException { Writer writer = findWriterForVector(clusterId); postProcessedClusterDirectories.put(clusterId, PathDirectory.getClusterPathForClusterId(clusterPostProcessorOutput, clusterId)); writeVectorToCluster(writer, point); }
/** * This method takes the clustered points output by the clustering algorithms as input and writes them into * their respective clusters. */ public void process() throws IOException { createPostProcessDirectory(); for (Pair<?, WeightedVectorWritable> record : new SequenceFileDirIterable<Writable, WeightedVectorWritable>(clusteredPoints, PathType.GLOB, PathFilters.partFilter(), null, false, conf)) { String clusterId = record.getFirst().toString().trim(); putVectorInRespectiveCluster(clusterId, record.getSecond()); } IOUtils.close(writersForClusters.values()); writersForClusters.clear(); }
/** * Process Sequentially. Reads the vectors one by one, and puts them into respective directory, named after * their clusterId. * * @param input The output path provided to the clustering algorithm, whose would be post processed. Hint : The * path of the directory containing clusters-*-final and clusteredPoints. * @param output The post processed data would be stored at this path. */ private static void postProcessSeq(Path input, Path output) throws IOException { ClusterOutputPostProcessor clusterOutputPostProcessor = new ClusterOutputPostProcessor(input, output, new Configuration()); clusterOutputPostProcessor.process(); }
/** * Finds out the cluster directory of the vector and writes it into the specified cluster. */ private void putVectorInRespectiveCluster(String clusterId, WeightedVectorWritable point) throws IOException { Writer writer = findWriterForVector(clusterId); postProcessedClusterDirectories.put(clusterId, PathDirectory.getClusterPathForClusterId(clusterPostProcessorOutput, clusterId)); writeVectorToCluster(writer, point); }