public boolean knows(String key) { return (find(key) >= 0); }
TernaryTree() { init(); }
public void insert(char[] key, int start, char val) { int len = strlen(key) + 1; if (freenode + len > eq.length) { redimNodeArrays(eq.length + BLOCK_SIZE); } root = insert(root, key, start, val); }
/** * Recursively insert the median first and then the median of the * lower and upper halves, and so on in order to get a balanced * tree. The array of keys is assumed to be sorted in ascending * order. */ protected void insertBalanced(String[] k, char[] v, int offset, int n) { int m; if (n < 1) { return; } m = n >> 1; insert(k[m + offset], v[m + offset]); insertBalanced(k, v, offset, m); insertBalanced(k, v, offset + m + 1, n - m - 1); }
/** * Branches are initially compressed, needing * one node per key plus the size of the string * key. They are decompressed as needed when * another key with same prefix * is inserted. This saves a lot of space, * specially for long keys. */ public void insert(String key, char val) { // make sure we have enough room in the arrays int len = key.length() + 1; // maximum number of nodes that may be generated if (freenode + len > eq.length) { redimNodeArrays(eq.length + BLOCK_SIZE); } char strkey[] = new char[len--]; key.getChars(0, len, strkey, 0); strkey[len] = 0; root = insert(root, strkey, 0, val); }
private void compact(CharVector kx, TernaryTree map, char p) { int k; if (p == 0) { return; } if (sc[p] == 0xFFFF) { k = map.find(kv.getArray(), lo[p]); if (k < 0) { k = kx.alloc(strlen(kv.getArray(), lo[p]) + 1); strcpy(kx.getArray(), k, kv.getArray(), lo[p]); map.insert(kx.getArray(), k, (char)k); } lo[p] = (char)k; } else { compact(kx, map, lo[p]); if (sc[p] != 0) { compact(kx, map, eq[p]); } compact(kx, map, hi[p]); } }
/** * Each node stores a character (splitchar) which is part of * some key(s). In a compressed branch (one that only contain * a single string key) the trailer of the key which is not * already in nodes is stored externally in the kv array. * As items are inserted, key substrings decrease. * Some substrings may completely disappear when the whole * branch is totally decompressed. * The tree is traversed to find the key substrings actually * used. In addition, duplicate substrings are removed using * a map (implemented with a TernaryTree!). * */ public void trimToSize() { // first balance the tree for best performance balance(); // redimension the node arrays redimNodeArrays(freenode); // ok, compact kv array CharVector kx = new CharVector(); kx.alloc(1); TernaryTree map = new TernaryTree(); compact(kx, map, root); kv = kx; kv.trimToSize(); }
int len = strlen(key, start); if (p == 0) { lo[p] = (char)kv.alloc(len + 1); // use 'lo' to hold pointer to key strcpy(kv.getArray(), lo[p], key, start); } else { sc[p] = 0; lo[p] = insert(lo[p], key, start, val); } else if (s == sc[p]) { if (s != 0) { eq[p] = insert(eq[p], key, start + 1, val); } else { hi[p] = insert(hi[p], key, start, val);
public void loadSimplePatterns(InputStream stream) { SimplePatternParser pp = new SimplePatternParser(); ivalues = new TernaryTree(); pp.parse(stream, this); // patterns/values should be now in the tree // let's optimize a bit trimToSize(); vspace.trimToSize(); classmap.trimToSize(); // get rid of the auxiliary map ivalues = null; }
/** * Balance the tree for best search performance */ public void balance() { // System.out.print("Before root splitchar = "); System.out.println(sc[root]); int i = 0, n = length; String[] k = new String[n]; char[] v = new char[n]; Iterator iter = new Iterator(); while (iter.hasMoreElements()) { v[i] = iter.getValue(); k[i++] = (String)iter.nextElement(); } init(); insertBalanced(k, v, 0, n); // With uniform letter distribution sc[root] should be around 'm' // System.out.print("After root splitchar = "); System.out.println(sc[root]); }
/** * Add a pattern to the tree. Mainly, to be used by * {@link SimplePatternParser SimplePatternParser} class as callback to * add a pattern to the tree. * @param pattern the hyphenation pattern * @param ivalue interletter weight values indicating the * desirability and priority of hyphenating at a given point * within the pattern. It should contain only digit characters. * (i.e. '0' to '9'). */ public void addPattern(String pattern, String ivalue) { int k = ivalues.find(ivalue); if (k <= 0) { k = packValues(ivalue); ivalues.insert(ivalue, (char)k); } insert(pattern, (char)k); }
public HyphenationTree() { stoplist = new HashMap(23); // usually a small table classmap = new TernaryTree(); vspace = new ByteVector(); vspace.alloc(1); // this reserves index 0, which we don't use }
public void printStats() { System.out.println("Value space size = " + Integer.toString(vspace.length())); super.printStats(); } }
/** * Add a character class to the tree. It is used by * {@link SimplePatternParser SimplePatternParser} as callback to * add character classes. Character classes define the * valid word characters for hyphenation. If a word contains * a character not defined in any of the classes, it is not hyphenated. * It also defines a way to normalize the characters in order * to compare them with the stored patterns. Usually pattern * files use only lower case characters, in this case a class * for letter 'a', for example, should be defined as "aA", the first * character being the normalization char. */ public void addClass(String chargroup) { if (chargroup.length() > 0) { char equivChar = chargroup.charAt(0); char[] key = new char[2]; key[1] = 0; for (int i = 0; i < chargroup.length(); i++) { key[0] = chargroup.charAt(i); classmap.insert(key, 0, equivChar); } } }
public int find(char[] key, int start) { int d; char p = root; int i = start; char c; while (p != 0) { if (sc[p] == 0xFFFF) { if (strcmp(key, i, kv.getArray(), lo[p]) == 0) { return eq[p]; } else { return -1; } } c = key[i]; d = c - sc[p]; if (d == 0) { if (c == 0) { return eq[p]; } i++; p = eq[p]; } else if (d < 0) { p = lo[p]; } else { p = hi[p]; } } return -1; }
private void compact(CharVector kx, TernaryTree map, char p) { int k; if (p == 0) { return; } if (sc[p] == 0xFFFF) { k = map.find(kv.getArray(), lo[p]); if (k < 0) { k = kx.alloc(strlen(kv.getArray(), lo[p]) + 1); strcpy(kx.getArray(), k, kv.getArray(), lo[p]); map.insert(kx.getArray(), k, (char)k); } lo[p] = (char)k; } else { compact(kx, map, lo[p]); if (sc[p] != 0) { compact(kx, map, eq[p]); } compact(kx, map, hi[p]); } }
/** * Each node stores a character (splitchar) which is part of * some key(s). In a compressed branch (one that only contain * a single string key) the trailer of the key which is not * already in nodes is stored externally in the kv array. * As items are inserted, key substrings decrease. * Some substrings may completely disappear when the whole * branch is totally decompressed. * The tree is traversed to find the key substrings actually * used. In addition, duplicate substrings are removed using * a map (implemented with a TernaryTree!). * */ public void trimToSize() { // first balance the tree for best performance balance(); // redimension the node arrays redimNodeArrays(freenode); // ok, compact kv array CharVector kx = new CharVector(); kx.alloc(1); TernaryTree map = new TernaryTree(); compact(kx, map, root); kv = kx; kv.trimToSize(); }
int len = strlen(key, start); if (p == 0) { lo[p] = (char)kv.alloc(len + 1); // use 'lo' to hold pointer to key strcpy(kv.getArray(), lo[p], key, start); } else { sc[p] = 0; lo[p] = insert(lo[p], key, start, val); } else if (s == sc[p]) { if (s != 0) { eq[p] = insert(eq[p], key, start + 1, val); } else { hi[p] = insert(hi[p], key, start, val);
public void loadSimplePatterns(InputStream stream) { SimplePatternParser pp = new SimplePatternParser(); ivalues = new TernaryTree(); pp.parse(stream, this); // patterns/values should be now in the tree // let's optimize a bit trimToSize(); vspace.trimToSize(); classmap.trimToSize(); // get rid of the auxiliary map ivalues = null; }
/** * Recursively insert the median first and then the median of the * lower and upper halves, and so on in order to get a balanced * tree. The array of keys is assumed to be sorted in ascending * order. */ protected void insertBalanced(String[] k, char[] v, int offset, int n) { int m; if (n < 1) { return; } m = n >> 1; insert(k[m + offset], v[m + offset]); insertBalanced(k, v, offset, m); insertBalanced(k, v, offset + m + 1, n - m - 1); }