/** * Gets the offset of the context for an n-gram (represented by offset) * * @param offset * @return */ public long getPrefixOffset(final long offset, final int ngramOrder) { if (ngramOrder == 0) return -1; return contextOffsetOf(getKey(offset, ngramOrder)); }
/** * @param word * @param suffixIndex * @return */ private final long shrinkKey(final long key) { final int word = ngramMap.wordOf(key); final long suffixIndex = ngramMap.contextOffsetOf(key); return (((long) word) << (numSuffixBits)) | suffixIndex; }
public long getNextContextOffset(final long offset, final int ngramOrder) { return contextOffsetOf(getKey(offset, ngramOrder)); }
private void setKey(final long index, final long putKey) { final long contextOffset = wordRanges == null ? shrinkKey(putKey) : ngramMap.contextOffsetOf(putKey); assert contextOffset >= 0; keys.set(index, contextOffset + 1); }
/** * @param contextOffset * @param contextOrder * @param word * @param scratch * @return */ private void getNgramFromContextEncodingHelp(final long contextOffset, final int contextOrder, final int word, final int[] scratch) { if (contextOrder < 0) { scratch[0] = word; } else { long contextOffset_ = contextOffset; int word_ = word; scratch[reversed ? 0 : (scratch.length - 1)] = word_; for (int i = 0; i <= contextOrder; ++i) { final int ngramOrder = contextOrder - i; final long key = getKey(contextOffset_, ngramOrder); contextOffset_ = contextOffsetOf(key); word_ = wordOf(key); scratch[reversed ? (i + 1) : (scratch.length - i - 2)] = word_; } } }
public int[] getNgramForOffset(final long offset, final int ngramOrder, final int[] ret) { long offset_ = offset; for (int i = 0; i <= ngramOrder; ++i) { final long key = getMap(ngramOrder - i).getKey(offset_); offset_ = contextOffsetOf(key); final int word_ = wordOf(key); ret[reversed ? (i) : (ngramOrder - i)] = word_; } return ret; }
public int getFirstWordForOffset(final long offset, final int ngramOrder) { final long key = getMap(ngramOrder).getKey(offset); if (ngramOrder == 0) return wordOf(key); else return getFirstWordForOffset(contextOffsetOf(key), ngramOrder - 1); }
/** * @param map * @param ngram * @param startPos * @param endPos * @param key * @param val * @param forcedNew * @param suffixIndex * @return */ private long putHelpWithSuffixIndex(final HashMap map, final int[] ngram, final int startPos, final int endPos, final long key, final T val, final boolean forcedNew, final long suffixIndex) { final int ngramOrder = endPos - startPos - 1; final long oldSize = map.size(); final long index = map.put(key); final boolean addWorked = values.add(ngram, startPos, endPos, ngramOrder, index, contextOffsetOf(key), wordOf(key), val, suffixIndex, map.size() > oldSize || forcedNew); if (!addWorked) return -1; return index; }
/** * @param key * @param returnFirstEmptyIndex * @return */ private long linearSearch(final long key, final boolean returnFirstEmptyIndex) { final int word = ngramMap.wordOf(key); if (word >= numWords) return -1; final long rangeStart = wordRangeStart(word); final long rangeEnd = wordRangeEnd(word); final long numHashPositions = rangeEnd - rangeStart; if (numHashPositions == 0) return -1L; final long startIndex = hash(key, numHashPositions, rangeStart); final long contextOffsetOf = wordRanges == null ? shrinkKey(key) : ngramMap.contextOffsetOf(key); assert contextOffsetOf >= 0; assert word >= 0; assert startIndex >= rangeStart; assert startIndex < rangeEnd; final long index = keys.linearSearch(contextOffsetOf + 1, rangeStart, rangeEnd, startIndex, EMPTY_KEY, returnFirstEmptyIndex); return index; }
getNgramFromContextEncodingHelp(contextOffsetOf(key), ngramOrder - 1, wordOf(key), scratchArray); final long newKey = newMap.getKey(scratchArray, 0, scratchArray.length); assert newKey >= 0 : "Failure for old n-gram " + Arrays.toString(scratchArray); final boolean addWorked = newMap.values.add(scratchArray, 0, scratchArray.length, ngramOrder, index, contextOffsetOf(newKey), wordOf(newKey), val, suffixIndex, true); assert addWorked;
@Override public long put(final int[] ngram, int startPos, int endPos, final T val) { final int ngramOrder = endPos - startPos - 1; HashMap map = maps[ngramOrder]; if (map == null) { initMap(initCapacities[ngramOrder], ngramOrder); map = maps[ngramOrder]; } if (map instanceof ExplicitWordHashMap && map.getLoadFactor() >= maxLoadFactor) { rehash(ngramOrder, map.getCapacity() * 3 / 2); map = maps[ngramOrder]; } final long key = getKey(ngram, startPos, endPos); if (key < 0) return -1L; long oldSize = map.size(); final long index = map.put(key); final long suffixIndex = getSuffixOffset(ngram, startPos, endPos); values.add(ngram, startPos, endPos, ngramOrder, index, contextOffsetOf(key), wordOf(key), val, suffixIndex, map.size() > oldSize); return index; }