@Override public long getKey(long contextOffset) { return AbstractNgramMap.combineToKey((int) contextOffset, 0L); }
public int[] getNgramForOffset(long offset, int ngramOrder) { int[] ret = new int[ngramOrder + 1]; long offset_ = offset; for (int i = 0; i <= ngramOrder; ++i) { long key = maps[ngramOrder - i].getKey(offset_); offset_ = AbstractNgramMap.contextOffsetOf(key); int word_ = AbstractNgramMap.wordOf(key); ret[reversed ? (i) : (ret.length - i - 1)] = word_; } return ret; }
private long decompressSearch(final LongArray compressed, final long searchKey, final int ngramOrder, final T outputVal, final long searchOffset) { if (ngramOrder == 0) { final boolean lookingForOffset = searchKey >= 0; int word = lookingForOffset ? AbstractNgramMap.wordOf(searchKey) : (int) searchOffset; if (word < 0 || word >= maps[0].size()) return -1; if (outputVal != null) values.getFromOffset(word, 0, outputVal); return lookingForOffset ? word : AbstractNgramMap.combineToKey(word, 0); } final long fromIndex = 0; final long toIndex = ((compressed.size() / compressedBlockSize) - 1); final long low = binarySearchBlocks(compressed, compressed.size(), searchKey, fromIndex, toIndex, searchOffset); if (low < 0) return -1; final long index = decompressLinearSearch(compressed, low, searchKey, ngramOrder, outputVal, searchOffset); return index; }
public final long getOffset(final long key) { final long word = AbstractNgramMap.wordOf(key); return (word < 0 || word >= numWords) ? EMPTY_KEY : word; }
private void setKey(final long index, final long putKey) { final long contextOffset = AbstractNgramMap.contextOffsetOf(putKey); assert contextOffset >= 0; keys.set(index, contextOffset); }
@Override public final long getOffset(final long key) { final long word = ngramMap.wordOf(key); return (word < 0 || word >= numWords) ? EMPTY_KEY : word; }
/** * Gets the offset of the context for an n-gram (represented by offset) * * @param offset * @return */ public long getPrefixOffset(long offset, int ngramOrder) { if (ngramOrder == 0) return -1; return AbstractNgramMap.contextOffsetOf(getKey(offset, ngramOrder)); }
@Override public int[] getNgramFromContextEncoding(long contextOffset, int contextOrder, int word) { if (contextOrder < 0) return new int[] { word }; int[] ret = new int[contextOrder + 2]; long contextOffset_ = contextOffset; int word_ = word; ret[reversed ? 0 : (ret.length - 1)] = word_; for (int i = 0; i <= contextOrder; ++i) { final int ngramOrder = contextOrder - i; long key = getKey(contextOffset_, ngramOrder); contextOffset_ = AbstractNgramMap.contextOffsetOf(key); word_ = AbstractNgramMap.wordOf(key); ret[reversed ? (i + 1) : (ret.length - i - 2)] = word_; } return ret; }
@Override public long put(final long key) { return ngramMap.wordOf(key); }
@Override public long getKey(final long contextOffset) { return ngramMap.combineToKey((int) contextOffset, 0L); }
public long getNextContextOffset(long offset, final int ngramOrder) { return AbstractNgramMap.contextOffsetOf(getKey(offset, ngramOrder)); }
/** * @param key * @param returnFirstEmptyIndex * @return */ private long linearSearch(final long key, boolean returnFirstEmptyIndex) { int word = AbstractNgramMap.wordOf(key); if (word >= wordRanges.length) return -1; final long rangeStart = wordRanges[word]; final long rangeEnd = ((word == wordRanges.length - 1) ? getCapacity() : wordRanges[word + 1]); final long startIndex = hash(key, rangeStart, rangeEnd); if (startIndex < 0) return -1L; assert startIndex >= rangeStart; assert startIndex < rangeEnd; return keys.linearSearch(AbstractNgramMap.contextOffsetOf(key), rangeStart, rangeEnd, startIndex, EMPTY_KEY, returnFirstEmptyIndex); }
@Override public long put(final long key) { return AbstractNgramMap.wordOf(key); }
@Override public long getKey(long contextOffset) { return AbstractNgramMap.combineToKey(getWordForContext(contextOffset), getNextOffset(contextOffset)); }
private void rehash(final int changedNgramOrder, final long newCapacity) { final ValueContainer<T> newValues = values.createFreshValues(); final long[] newCapacities = new long[maps.length]; Arrays.fill(newCapacities, -1L); for (int ngramOrder = 0; ngramOrder < maps.length; ++ngramOrder) { if (maps[ngramOrder] == null) break; newCapacities[ngramOrder] = ngramOrder == changedNgramOrder ? newCapacity : maps[ngramOrder].getCapacity(); } final HashNgramMap<T> newMap = new HashNgramMap<T>(newValues, opts, newCapacities, reversed); for (int ngramOrder = 0; ngramOrder < maps.length; ++ngramOrder) { final HashMap currMap = maps[ngramOrder]; if (currMap == null) continue; for (long actualIndex = 0; actualIndex < currMap.getCapacity(); ++actualIndex) { final long key = currMap.getKey(actualIndex); if (currMap.isEmptyKey(key)) continue; final int[] ngram = getNgramFromContextEncoding(AbstractNgramMap.contextOffsetOf(key), ngramOrder - 1, AbstractNgramMap.wordOf(key)); final T val = values.getScratchValue(); values.getFromOffset(actualIndex, ngramOrder, val); newMap.put(ngram, 0, ngram.length, val); } } System.arraycopy(newMap.maps, 0, maps, 0, newMap.maps.length); values.setFromOtherValues(newValues); values.setMap(this); }
public int getNextWord(long offset, final int ngramOrder) { return AbstractNgramMap.wordOf(getKey(offset, ngramOrder)); }