/*** * Selects K elements from the provided data as the centroids of the clusters. If K is -1 all provided * data points will be selected. It is not guaranteed that the same data point will not be selected * many times. * * @params data source of centroids * @return the selected centroids */ @Override public ByteCentroidsResult cluster(byte[][] data) { int nc = this.K; if (nc == -1) { nc = data.length; } ByteCentroidsResult result = new ByteCentroidsResult(); result.centroids = new byte[nc][]; for (int i = 0; i < nc; i++) { int dIndex = this.random.nextInt(data.length); result.centroids[i] = Arrays.copyOf(data[dIndex], data[dIndex].length); } return result; }
ByteCentroidsResult result = new ByteCentroidsResult();
private Node readNode(HierarchicalByteKMeansResult hBytekm, Scanner reader) throws IOException { String line; while ((line = reader.nextLine()).length()==0) {/*do nothing*/} char type = line.charAt(0); //read result data Node node = new Node(); node.result = new ByteCentroidsResult(); node.result.readASCII(reader); if (type == 'I') { node.children = new Node[node.result.numClusters()]; for (int i=0; i<node.result.numClusters(); i++) { node.children[i] = readNode(hBytekm,reader); } } else { node.children = null; } return node ; }
/** * Selects K elements from the provided {@link DataSource} as the centroids of the clusters. * If K is -1 all provided data points will be selected. It is not guaranteed that the same data * point will not be selected many times. * * @params data a data source object * @return the selected centroids */ @Override public ByteCentroidsResult cluster(DataSource<byte[]> data) { int nc = this.K; if (nc == -1) { nc = data.size(); } ByteCentroidsResult result = new ByteCentroidsResult(); result.centroids = new byte[nc][M]; byte[][] dataRow = new byte[1][]; for (int i = 0; i < nc; i++) { int dIndex = this.random.nextInt(data.size()); dataRow[0] = result.centroids[i]; data.getData(dIndex, dIndex+1, dataRow); } return result; } }
private Node readNodeB(HierarchicalByteKMeansResult hBytekm, DataInput dis) throws IOException { Node node = new Node(); char type = (char) dis.readByte(); //read result data node.result = new ByteCentroidsResult(); node.result.readBinary(dis); if (type == 'I') { node.children = new Node[node.result.numClusters()]; for (int i=0; i<node.result.numClusters(); i++) { node.children[i] = readNodeB(hBytekm, dis); } } else { node.children = null; } return node ; }
/** * load some initially selected centroids from {@link FeatureSelect} as a * {@link ByteKMeans} instance * * @param initialCentroids * @param k * @return a {@link ByteKMeans} * @throws IOException */ public static ByteCentroidsResult sequenceFileToCluster(String initialCentroids, int k) throws IOException { final SelectTopKDump neededdump = new SelectTopKDump(k); final IntBytesSequenceMemoryUtility utility = new IntBytesSequenceMemoryUtility(initialCentroids, true); utility.exportData(NamingStrategy.KEY, new ExtractionState(), 0, neededdump); final ByteCentroidsResult newFastKMeansCluster = new ByteCentroidsResult(); newFastKMeansCluster.centroids = neededdump.centroids; return newFastKMeansCluster; } }
/** * Selects K elements from the provided {@link DataSource} as the centroids of the clusters. * If K is -1 all provided data points will be selected. It is guaranteed that the same data * point will not be selected many times, though this is not the case if two seperate entries * provided are identical. * * @params data a data source object * @return the selected centroids */ @Override public ByteCentroidsResult cluster(DataSource<byte[]> data) { ByteCentroidsResult result = new ByteCentroidsResult(); if(K == -1) { final int nc = data.size(); result.centroids = new byte[nc][data.numDimensions()]; } else { result.centroids = new byte[K][data.numDimensions()]; } data.getRandomRows(result.centroids); return result; } }
final ByteCentroidsResult newFastKMeansCluster = new ByteCentroidsResult(); newFastKMeansCluster.centroids = newcentroids;