private Node readNodeB(HierarchicalByteKMeansResult hBytekm, DataInput dis) throws IOException { Node node = new Node(); char type = (char) dis.readByte(); //read result data node.result = new ByteCentroidsResult(); node.result.readBinary(dis); if (type == 'I') { node.children = new Node[node.result.numClusters()]; for (int i=0; i<node.result.numClusters(); i++) { node.children[i] = readNodeB(hBytekm, dis); } } else { node.children = null; } return node ; }
/** * Given a path, get the cluster centroid associated with the cluster index of the path. * @param path * @return the centroid of a given path */ public byte [] getClusterCentroid(int [] path) { Node node = root; for (int i=0; i<path.length-1; i++) { node = node.children[path[i]]; } return node.result.getCentroids()[path[path.length-1]]; }
private Node readNode(HierarchicalByteKMeansResult hBytekm, Scanner reader) throws IOException { String line; while ((line = reader.nextLine()).length()==0) {/*do nothing*/} char type = line.charAt(0); //read result data Node node = new Node(); node.result = new ByteCentroidsResult(); node.result.readASCII(reader); if (type == 'I') { node.children = new Node[node.result.numClusters()]; for (int i=0; i<node.result.numClusters(); i++) { node.children[i] = readNode(hBytekm,reader); } } else { node.children = null; } return node ; }
private void writeNodeB(DataOutput dos, Node node) throws IOException { //write node type char type; if (node.children == null) type='L'; //intermediate else type='I'; //leaf dos.writeByte(type); //write result data node.result.writeBinary(dos); //write children if (node.children != null) { for (int i=0; i<node.result.numClusters(); i++) { writeNodeB(dos, node.children[i]); } } }
private static HardAssigner<byte[], float[], IntFloatPair> trainQuantiser( GroupedDataset<String, ListDataset<Record<FImage>>, Record<FImage>> sample, PyramidDenseSIFT<FImage> pdsift) { List<LocalFeatureList<ByteDSIFTKeypoint>> allkeys = new ArrayList<LocalFeatureList<ByteDSIFTKeypoint>>(); for (final Record<FImage> rec : sample) { final FImage img = rec.getImage(); pdsift.analyseImage(img); allkeys.add(pdsift.getByteKeypoints(0.005f)); } if (allkeys.size() > 10000) allkeys = allkeys.subList(0, 10000); final ByteKMeans km = ByteKMeans.createKDTreeEnsemble(300); final DataSource<byte[]> datasource = new LocalFeatureListDataSource<ByteDSIFTKeypoint, byte[]>(allkeys); final ByteCentroidsResult result = km.cluster(datasource); return result.defaultHardAssigner(); } }
/*** * Selects K elements from the provided data as the centroids of the clusters. If K is -1 all provided * data points will be selected. It is not guaranteed that the same data point will not be selected * many times. * * @params data source of centroids * @return the selected centroids */ @Override public ByteCentroidsResult cluster(byte[][] data) { int nc = this.K; if (nc == -1) { nc = data.length; } ByteCentroidsResult result = new ByteCentroidsResult(); result.centroids = new byte[nc][]; for (int i = 0; i < nc; i++) { int dIndex = this.random.nextInt(data.length); result.centroids[i] = Arrays.copyOf(data[dIndex], data[dIndex].length); } return result; }
private void writeNodeASCII(PrintWriter writer, final Node node) throws IOException { //write node type if (node.children == null) writer.write("L\n"); //intermediate else writer.write("I\n"); //leaf //write result data node.result.writeASCII(writer); // node.result.writeASCII(writer, false); writer.flush(); //write children if (node.children != null) { for (int i=0; i<node.result.numClusters(); i++) { writeNodeASCII(writer, node.children[i]); } } }
private int countLeaves(Node node) { int count = 0; if (node.children == null) { count = node.result.numClusters(); } else { for (int i=0; i<node.result.numClusters(); i++) { count += countLeaves(node.children[i]); } } return count; }
@Override public int[][] performClustering(byte[][] data) { ByteCentroidsResult res = this.cluster(data); return new IndexClusters(res.defaultHardAssigner().assign(data)).clusters(); }
ByteCentroidsResult result = new ByteCentroidsResult();
@Override public DoubleFV createFeature(final MBFImage image) { if (this.rabc == null) { try { final ByteCentroidsResult clusterer = IOUtils.read(Mode.class .getResourceAsStream("/org/openimaj/demos/codebooks/random-100-highfield-codebook.voc"), ByteCentroidsResult.class); this.rabc = new ExactByteAssigner(clusterer); this.fv = new DoubleFV(clusterer.numClusters()); this.engine.getOptions().setDoubleInitialImage(false); } catch (final IOException e) { e.printStackTrace(); } } FImage img = Transforms.calculateIntensity(image); img = ResizeProcessor.halfSize(img); final List<Keypoint> keys = this.engine.findFeatures(img); for (final Keypoint keypoint : keys) { image.drawPoint(new Point2dImpl(keypoint.x * 2f, keypoint.y * 2f), RGBColour.RED, 3); } Arrays.fill(this.fv.values, 0); for (final Keypoint k : keys) { this.fv.values[this.rabc.assign(k.ivec)]++; } return this.fv; }
@Override public int[][] performClustering(byte[][] data) { ByteCentroidsResult clusters = this.cluster(data); return new IndexClusters(clusters.defaultHardAssigner().assign(data)).clusters(); }
/** * Selects K elements from the provided {@link DataSource} as the centroids of the clusters. * If K is -1 all provided data points will be selected. It is not guaranteed that the same data * point will not be selected many times. * * @params data a data source object * @return the selected centroids */ @Override public ByteCentroidsResult cluster(DataSource<byte[]> data) { int nc = this.K; if (nc == -1) { nc = data.size(); } ByteCentroidsResult result = new ByteCentroidsResult(); result.centroids = new byte[nc][M]; byte[][] dataRow = new byte[1][]; for (int i = 0; i < nc; i++) { int dIndex = this.random.nextInt(data.size()); dataRow[0] = result.centroids[i]; data.getData(dIndex, dIndex+1, dataRow); } return result; } }
@Override public DoubleFV createFeature(final MBFImage image) { if (this.rabc == null) { try { final ByteCentroidsResult clusterer = IOUtils.read(Mode.class .getResourceAsStream("/org/openimaj/demos/codebooks/random-100-highfield-codebook.voc"), ByteCentroidsResult.class); this.rabc = new ExactByteAssigner(clusterer); this.fv = new DoubleFV(clusterer.numClusters()); this.engine.getOptions().setDoubleInitialImage(false); } catch (final IOException e) { e.printStackTrace(); } } FImage img = Transforms.calculateIntensity(image); img = ResizeProcessor.halfSize(img); final List<Keypoint> keys = this.engine.findFeatures(img); for (final Keypoint keypoint : keys) { image.drawPoint(new Point2dImpl(keypoint.x * 2f, keypoint.y * 2f), RGBColour.RED, 3); } Arrays.fill(this.fv.values, 0); for (final Keypoint k : keys) { this.fv.values[this.rabc.assign(k.ivec)]++; } return this.fv; }
/** * Recursively print the tree of cluster centroids to {@link System#out}. * * @param node * the node to start from * @param indent * the amount to indent the current line */ static void printNode(Node node, int indent) { final byte[][] centroids = node.result.getCentroids(); final Node[] children = node.children; if (children != null) { for (int i = 0; i < children.length; i++) { for (int j = 0; j < indent; j++) System.out.print("\t"); System.out.println(Arrays.toString(centroids[i])); printNode(children[i], indent + 1); } } else { for (int i = 0; i < centroids.length; i++) { for (int j = 0; j < indent; j++) System.out.print("\t"); System.out.println(Arrays.toString(centroids[i])); } } } }
final HardAssigner<byte[], ?, ?> assigner = result.defaultHardAssigner();
/** * load some initially selected centroids from {@link FeatureSelect} as a * {@link ByteKMeans} instance * * @param initialCentroids * @param k * @return a {@link ByteKMeans} * @throws IOException */ public static ByteCentroidsResult sequenceFileToCluster(String initialCentroids, int k) throws IOException { final SelectTopKDump neededdump = new SelectTopKDump(k); final IntBytesSequenceMemoryUtility utility = new IntBytesSequenceMemoryUtility(initialCentroids, true); utility.exportData(NamingStrategy.KEY, new ExtractionState(), 0, neededdump); final ByteCentroidsResult newFastKMeansCluster = new ByteCentroidsResult(); newFastKMeansCluster.centroids = neededdump.centroids; return newFastKMeansCluster; } }
final HardAssigner<byte[], ?, ?> assigner = result.defaultHardAssigner();
/** * Selects K elements from the provided {@link DataSource} as the centroids of the clusters. * If K is -1 all provided data points will be selected. It is guaranteed that the same data * point will not be selected many times, though this is not the case if two seperate entries * provided are identical. * * @params data a data source object * @return the selected centroids */ @Override public ByteCentroidsResult cluster(DataSource<byte[]> data) { ByteCentroidsResult result = new ByteCentroidsResult(); if(K == -1) { final int nc = data.size(); result.centroids = new byte[nc][data.numDimensions()]; } else { result.centroids = new byte[K][data.numDimensions()]; } data.getRandomRows(result.centroids); return result; } }
/** * Compute HierarchicalByteKMeans clustering. * * @param data Data to cluster. * @param K Number of clusters for this node. * @param height Tree height. * * @return a new HierarchicalByteKMeans node representing a sub-clustering. **/ private Node trainLevel(final byte[][] data, int K, int height) { Node node = new Node(); node.children = (height == 1) ? null : new Node[K]; ByteKMeans kmeans = newByteKMeans(K); node.result = kmeans.cluster(data); HardAssigner<byte[], float[], IntFloatPair> assigner = node.result.defaultHardAssigner(); if (height > 1) { int[] ids = assigner.assign(data); for (int k = 0; k < K; k++) { byte[][] partition = extractSubset(data, ids, k); int partitionK = Math.min(K, partition.length); node.children[k] = trainLevel(partition, partitionK, height - 1); } } return node; }