/** * Selects backoffs for part of word sequence * unused in {@link #getAvailableProb(WordSequence, TrieRange, float) getAvailableProb} * Amount of unused words is specified by local variable curDepth * @param wordSequence - full word sequence that is scored * @return backoff */ private float getAvailableBackoff(WordSequence wordSequence) { float backoff = 0.0f; int wordsNum = wordSequence.size(); int wordId = unigramIDMap.get(wordSequence.getWord(wordsNum - 2)); TrieRange range = new TrieRange(unigrams[wordId].next, unigrams[wordId + 1].next); if (curDepth == 1) { backoff += unigrams[wordId].backoff; } int sequenceIdx, orderMinusTwo; for (sequenceIdx = wordsNum - 3, orderMinusTwo = 0; sequenceIdx >= 0; sequenceIdx--, orderMinusTwo++) { int tmpWordId = unigramIDMap.get(wordSequence.getWord(sequenceIdx)); float tmpBackoff = trie.readNgramBackoff(tmpWordId, orderMinusTwo, range, quant); if (!range.getFound()) break; backoff += tmpBackoff; if (!range.isSearchable()) break; } return backoff; }
/** * Selects ngram of highest order available for specified word sequence * and extracts probability for it * @param wordSequence - word sequence to score * @param range - range to look bigram in * @param prob - probability of unigram * @return probability of of highest order ngram available */ private float getAvailableProb(WordSequence wordSequence, TrieRange range, float prob) { if (!range.isSearchable()) return prob; for (int reverseOrderMinusTwo = wordSequence.size() - 2; reverseOrderMinusTwo >= 0; reverseOrderMinusTwo--) { int orderMinusTwo = wordSequence.size() - 2 - reverseOrderMinusTwo; if (orderMinusTwo + 1 == maxDepth) break; int wordId = unigramIDMap.get(wordSequence.getWord(reverseOrderMinusTwo)); float updatedProb = trie.readNgramProb(wordId, orderMinusTwo, range, quant); if (!range.getFound()) break; prob = updatedProb; curDepth++; if (!range.isSearchable()) break; } return prob; }
/** * Selects backoffs for part of word sequence * unused in {@link #getAvailableProb(WordSequence, TrieRange, float) getAvailableProb} * Amount of unused words is specified by local variable curDepth * @param wordSequence - full word sequence that is scored * @return backoff */ private float getAvailableBackoff(WordSequence wordSequence) { float backoff = 0.0f; int wordsNum = wordSequence.size(); int wordId = unigramIDMap.get(wordSequence.getWord(wordsNum - 2)); TrieRange range = new TrieRange(unigrams[wordId].next, unigrams[wordId + 1].next); if (curDepth == 1) { backoff += unigrams[wordId].backoff; } int sequenceIdx, orderMinusTwo; for (sequenceIdx = wordsNum - 3, orderMinusTwo = 0; sequenceIdx >= 0; sequenceIdx--, orderMinusTwo++) { int tmpWordId = unigramIDMap.get(wordSequence.getWord(sequenceIdx)); float tmpBackoff = trie.readNgramBackoff(tmpWordId, orderMinusTwo, range, quant); if (!range.getFound()) break; backoff += tmpBackoff; if (!range.isSearchable()) break; } return backoff; }
/** * Selects ngram of highest order available for specified word sequence * and extracts probability for it * @param wordSequence - word sequence to score * @param range - range to look bigram in * @param prob - probability of unigram * @return probability of of highest order ngram available */ private float getAvailableProb(WordSequence wordSequence, TrieRange range, float prob) { if (!range.isSearchable()) return prob; for (int reverseOrderMinusTwo = wordSequence.size() - 2; reverseOrderMinusTwo >= 0; reverseOrderMinusTwo--) { int orderMinusTwo = wordSequence.size() - 2 - reverseOrderMinusTwo; if (orderMinusTwo + 1 == maxDepth) break; int wordId = unigramIDMap.get(wordSequence.getWord(reverseOrderMinusTwo)); float updatedProb = trie.readNgramProb(wordId, orderMinusTwo, range, quant); if (!range.getFound()) break; prob = updatedProb; curDepth++; if (!range.isSearchable()) break; } return prob; }