@Override public long getTotalTokenCount() { return map.getMapForOrder(1).size(); }
@Override public long getCount(List<String> tokens) { LongRef count = map.get(tokens); long result; if (count == null) { result = 0; } else { result = count.asLong(); } //System.out.println(tokens + " -> " + result); return result; }
public long getTotalSize() { long ret = 0L; for (int ngramOrder = 0; ngramOrder < getMaxNgramOrder(); ++ngramOrder) { final HashMap currMap = getMap(ngramOrder); if (currMap == null) break; ret += currMap.size(); } return ret; }
public int getFirstWordForOffset(final long offset, final int ngramOrder) { final long key = getMap(ngramOrder).getKey(offset); if (ngramOrder == 0) return wordOf(key); else return getFirstWordForOffset(contextOffsetOf(key), ngramOrder - 1); }
@Override public long getKey(final long contextOffset) { return wordRanges == null ? expandKey(getNextOffset(contextOffset)) : ngramMap.combineToKey(getWordForContext(contextOffset), getNextOffset(contextOffset)); }
@Override public long getKey(long contextOffset) { return AbstractNgramMap.combineToKey(getWordForContext(contextOffset), getNextOffset(contextOffset)); }
private long getKey(final int[] ngram, final int startPos, final int endPos) { long contextOffset = 0; for (int ngramOrder = 0; ngramOrder < endPos - startPos - 1; ++ngramOrder) { final int currNgramPos = reversed ? (endPos - ngramOrder - 1) : (startPos + ngramOrder); contextOffset = getOffsetForContextEncoding(contextOffset, ngramOrder - 1, ngram[currNgramPos], null); if (contextOffset == -1L) { return -1; } } return combineToKey(headWord(ngram, startPos, endPos), contextOffset); }
public int[] getNgramForOffset(long offset, int ngramOrder) { int[] ret = new int[ngramOrder + 1]; long offset_ = offset; for (int i = 0; i <= ngramOrder; ++i) { long key = maps[ngramOrder - i].getKey(offset_); offset_ = AbstractNgramMap.contextOffsetOf(key); int word_ = AbstractNgramMap.wordOf(key); ret[reversed ? (i) : (ret.length - i - 1)] = word_; } return ret; }
public void rehashIfNecessary(int num) { if (explicitMaps == null) return; for (int ngramOrder = 0; ngramOrder < explicitMaps.length; ++ngramOrder) { if (explicitMaps[ngramOrder] == null) continue; if (explicitMaps[ngramOrder].getLoadFactor(num) >= maxLoadFactor) { rehash(ngramOrder, (explicitMaps[ngramOrder].getCapacity() + num) * 3 / 2, num); return; } } }
/** * Gets the offset of the context for an n-gram (represented by offset) * * @param offset * @return */ public long getPrefixOffset(long offset, int ngramOrder) { if (ngramOrder == 0) return -1; return AbstractNgramMap.contextOffsetOf(getKey(offset, ngramOrder)); }
public ContextEncodedProbBackoffLm(final int lmOrder, final WordIndexer<W> wordIndexer, final ContextEncodedNgramMap<ProbBackoffPair> map, final ConfigOptions opts) { super(lmOrder, wordIndexer, (float) opts.unknownWordLogProb); this.map = (HashNgramMap<ProbBackoffPair>) map; this.values = (ProbBackoffValueContainer) map.getValues(); numWords = map.getNumNgrams(0); }
private long getOffsetHelpFromMap(int ngramOrder, long key) { if (isExplicit) { return (ngramOrder >= explicitMaps.length || explicitMaps[ngramOrder] == null) ? -1 : explicitMaps[ngramOrder].getOffset(key); } return ngramOrder == 0 ? implicitUnigramMap.getOffset(key) : implicitMaps[ngramOrder - 1].getOffset(key); }
@Override public boolean wordHasBigrams(final int word) { return getMaxNgramOrder() < 2 ? false : (explicitMaps == null ? implicitMaps[0].hasContexts(word) : explicitMaps[1].hasContexts(word)); }
@Override public Long next() { final long nextIndex = nextIndex(); return nextIndex; }
public int[] getNgramForOffset(final long offset, final int ngramOrder, final int[] ret) { long offset_ = offset; for (int i = 0; i <= ngramOrder; ++i) { final long key = getMap(ngramOrder - i).getKey(offset_); offset_ = contextOffsetOf(key); final int word_ = wordOf(key); ret[reversed ? (i) : (ngramOrder - i)] = word_; } return ret; }
public long getNextContextOffset(long offset, final int ngramOrder) { return AbstractNgramMap.contextOffsetOf(getKey(offset, ngramOrder)); }