@Override public LongRef getScratchValue() { return new LongRef(-1); }
@Override protected LongRef getDefaultVal() { return new LongRef(-1L); }
@Override protected LongRef getDefaultVal() { return new LongRef(-1L); }
@Override public LongRef getScratchValue() { return new LongRef(-1); }
/** * Gets the raw count of an n-gram. * * @param ngram * @param startPos * @param endPos * @return count of n-gram, or -1 if n-gram is not in the map. */ public long getRawCount(final int[] ngram, final int startPos, final int endPos) { final NgramMap<LongRef> localMap = map; long probContext = 0L; final LongRef scratch = new LongRef(-1L); for (int probContextOrder = -1; probContextOrder < endPos - startPos - 1; ++probContextOrder) { assert (probContext >= 0); probContext = localMap.getValueAndOffset(probContext, probContextOrder, ngram[endPos - probContextOrder - 2], scratch); if (probContext < 0) { return -1; } } return scratch.value; }
/** * @param callback * @param ngramOrder * @param line * @return */ private void parseLine(final String line, final int ngramOrder, final NgramOrderedLmReaderCallback<LongRef> callback) { final int tabIndex = line.indexOf('\t'); int spaceIndex = 0; final int[] ngram = new int[ngramOrder + 1]; final String words = line.substring(0, tabIndex); for (int i = 0;; ++i) { int nextIndex = line.indexOf(' ', spaceIndex); if (nextIndex < 0) nextIndex = words.length(); final String word = words.substring(spaceIndex, nextIndex); ngram[i] = wordIndexer.getOrAddIndexFromString(word); if (nextIndex == words.length()) break; spaceIndex = nextIndex + 1; } final long count = Long.parseLong(line.substring(tabIndex + 1)); callback.call(ngram, 0, ngram.length, new LongRef(count), words); }
/** * @param callback * @param ngramOrder * @param line * @return */ private void parseLine(final String line, final int ngramOrder, final NgramOrderedLmReaderCallback<LongRef> callback) { final int tabIndex = line.indexOf('\t'); int spaceIndex = 0; final int[] ngram = new int[ngramOrder + 1]; final String words = line.substring(0, tabIndex); for (int i = 0;; ++i) { int nextIndex = line.indexOf(' ', spaceIndex); if (nextIndex < 0) nextIndex = words.length(); final String word = words.substring(spaceIndex, nextIndex); ngram[i] = wordIndexer.getOrAddIndexFromString(word); if (nextIndex == words.length()) break; spaceIndex = nextIndex + 1; } final long count = Long.parseLong(line.substring(tabIndex + 1)); callback.call(ngram, 0, ngram.length, new LongRef(count), words); }
@Override public float getLogProb(final int[] ngram, final int startPos_, final int endPos_) { final NgramMap<LongRef> localMap = map; float logProb = oovWordLogProb; long probContext = 0L; int probContextOrder = -1; long lastCount = ((RankedCountValueContainer) map.getValues()).getUnigramSum(); final LongRef scratch = new LongRef(-1L); for (int i = endPos_ - 1; i >= startPos_; --i) { assert (probContext >= 0); probContext = localMap.getValueAndOffset(probContext, probContextOrder, ngram[i], scratch); if (probContext < 0) { return logProb; } else { logProb = (float) Math.log(scratch.value / ((float) lastCount) * pow(alpha, i - startPos_)); lastCount = scratch.value; probContextOrder++; } } return logProb; }
@Override public float getLogProb(final int[] ngram, final int startPos, final int endPos) { final NgramMap<LongRef> localMap = map; float logProb = oovWordLogProb; long probContext = 0L; int probContextOrder = -1; long backoffContext = 0L; int backoffContextOrder = -1; final LongRef scratch = new LongRef(-1L); for (int i = endPos - 1; i >= startPos; --i) { assert (probContext >= 0); probContext = localMap.getValueAndOffset(probContext, probContextOrder, ngram[i], scratch); if (probContext < 0) { return logProb; } else { final long currCount = scratch.value; long backoffCount = -1L; if (i == endPos - 1) { backoffCount = ((CountValueContainer) map.getValues()).getUnigramSum(); } else { backoffContext = localMap.getValueAndOffset(backoffContext, backoffContextOrder++, ngram[i], scratch); backoffCount = scratch.value; } logProb = (float) Math.log(currCount / ((float) backoffCount) * pow(alpha, i - startPos)); probContextOrder++; } } return logProb; }
sent[i + 1] = wordIndexer.getOrAddIndexFromString(words[i]); callback.call(sent, 0, sent.length, new LongRef(1L), line);