edu.cmu.sphinx.linguist.language.ngram.trie.NgramTrieModel$TrieRange java code examples

/**
 * extracts raw word sequence probability without using caching, 
 * making fresh LM trie traversing
 * @param wordSequence - sequence of words to get probability for
 * @return probability of specialized sequence of words
 */
private float getProbabilityRaw(WordSequence wordSequence) {
  int wordsNum = wordSequence.size();
  int wordId = unigramIDMap.get(wordSequence.getWord(wordsNum - 1));
  TrieRange range = new TrieRange(unigrams[wordId].next, unigrams[wordId + 1].next);
  float prob = unigrams[wordId].prob;
  curDepth = 1;
  if (wordsNum == 1)
    return prob;
  //find prob of ngrams of higher order if any
  prob = getAvailableProb(wordSequence, range, prob);
  if (curDepth < wordsNum) {
    //use backoff for rest of ngram
    prob += getAvailableBackoff(wordSequence);
  }
  return prob;
}

/**
 * Selects backoffs for part of word sequence 
 * unused in {@link #getAvailableProb(WordSequence, TrieRange, float) getAvailableProb}
 * Amount of unused words is specified by local variable curDepth
 * @param wordSequence - full word sequence that is scored
 * @return backoff
 */
private float getAvailableBackoff(WordSequence wordSequence) {
  float backoff = 0.0f;
  int wordsNum = wordSequence.size();
  int wordId = unigramIDMap.get(wordSequence.getWord(wordsNum - 2));
  TrieRange range = new TrieRange(unigrams[wordId].next, unigrams[wordId + 1].next);
  if (curDepth == 1) {
    backoff += unigrams[wordId].backoff;
  }
  int sequenceIdx, orderMinusTwo;
  for (sequenceIdx = wordsNum - 3, orderMinusTwo = 0; sequenceIdx >= 0; sequenceIdx--, orderMinusTwo++) {
    int tmpWordId = unigramIDMap.get(wordSequence.getWord(sequenceIdx));
    float tmpBackoff = trie.readNgramBackoff(tmpWordId, orderMinusTwo, range, quant);
    if (!range.getFound()) break;
    backoff += tmpBackoff;
    if (!range.isSearchable()) break;
  }
  return backoff;
}

/**
 * Selects backoffs for part of word sequence 
 * unused in {@link #getAvailableProb(WordSequence, TrieRange, float) getAvailableProb}
 * Amount of unused words is specified by local variable curDepth
 * @param wordSequence - full word sequence that is scored
 * @return backoff
 */
private float getAvailableBackoff(WordSequence wordSequence) {
  float backoff = 0.0f;
  int wordsNum = wordSequence.size();
  int wordId = unigramIDMap.get(wordSequence.getWord(wordsNum - 2));
  TrieRange range = new TrieRange(unigrams[wordId].next, unigrams[wordId + 1].next);
  if (curDepth == 1) {
    backoff += unigrams[wordId].backoff;
  }
  int sequenceIdx, orderMinusTwo;
  for (sequenceIdx = wordsNum - 3, orderMinusTwo = 0; sequenceIdx >= 0; sequenceIdx--, orderMinusTwo++) {
    int tmpWordId = unigramIDMap.get(wordSequence.getWord(sequenceIdx));
    float tmpBackoff = trie.readNgramBackoff(tmpWordId, orderMinusTwo, range, quant);
    if (!range.getFound()) break;
    backoff += tmpBackoff;
    if (!range.isSearchable()) break;
  }
  return backoff;
}

/**
 * extracts raw word sequence probability without using caching, 
 * making fresh LM trie traversing
 * @param wordSequence - sequence of words to get probability for
 * @return probability of specialized sequence of words
 */
private float getProbabilityRaw(WordSequence wordSequence) {
  int wordsNum = wordSequence.size();
  int wordId = unigramIDMap.get(wordSequence.getWord(wordsNum - 1));
  TrieRange range = new TrieRange(unigrams[wordId].next, unigrams[wordId + 1].next);
  float prob = unigrams[wordId].prob;
  curDepth = 1;
  if (wordsNum == 1)
    return prob;
  //find prob of ngrams of higher order if any
  prob = getAvailableProb(wordSequence, range, prob);
  if (curDepth < wordsNum) {
    //use backoff for rest of ngram
    prob += getAvailableBackoff(wordSequence);
  }
  return prob;
}

/**
 * Finds ngram index which corresponds to ngram with specified wordId.
 * Search is performed in specified range. 
 * Fills range with ngram successors if ngram was found, makes range invalid otherwise.
 * @param ngramSet - set of ngrams of certain order to look in
 * @param wordId - word id to look for
 * @param range - range to look in. range contains ngram successors or is invalid after method usage.
 * @return ngram index that can be converted into byte offset if ngram was found, -1 otherwise
 */
private int findNgram(NgramSet ngramSet, int wordId, TrieRange range) {
  int ptr;
  range.begin--;
  if ((ptr = uniformFind(ngramSet, range, wordId)) < 0) {
    range.setFound(false);
    return -1;
  }
  //read next order ngrams for future searches
  if (ngramSet instanceof MiddleNgramSet)
    ((MiddleNgramSet)ngramSet).readNextRange(ptr, range);
  return ptr;
}

/**
 * Selects ngram of highest order available for specified word sequence
 * and extracts probability for it
 * @param wordSequence - word sequence to score
 * @param range - range to look bigram in
 * @param prob - probability of unigram
 * @return probability of of highest order ngram available
 */
private float getAvailableProb(WordSequence wordSequence, TrieRange range, float prob) {
  if (!range.isSearchable()) return prob;
  for (int reverseOrderMinusTwo = wordSequence.size() - 2; reverseOrderMinusTwo >= 0; reverseOrderMinusTwo--) {
    int orderMinusTwo = wordSequence.size() - 2 - reverseOrderMinusTwo;
    if (orderMinusTwo + 1 == maxDepth) break;
    int wordId = unigramIDMap.get(wordSequence.getWord(reverseOrderMinusTwo));
    float updatedProb = trie.readNgramProb(wordId, orderMinusTwo, range, quant);
    if (!range.getFound()) break;
    prob = updatedProb;
    curDepth++;
    if (!range.isSearchable()) break;
  }
  return prob;
}

/**
 * Searches ngram index for given wordId in provided range 
 */
private int uniformFind(NgramSet ngram, TrieRange range, int wordId) {
  TrieRange vocabRange = new TrieRange(0, ngram.maxVocab);
  while (range.getWidth() > 1) {
    int pivot = range.begin + 1 + calculatePivot(wordId - vocabRange.begin, vocabRange.getWidth(), range.getWidth() - 1);
    int mid = ngram.readNgramWord(pivot);
    if (mid < wordId) {
      range.begin = pivot;
      vocabRange.begin = mid;
    } else if (mid > wordId){
      range.end = pivot;
      vocabRange.end = mid;
    } else {
      return pivot;
    }
  }
  return -1;
}

  boolean isSearchable() {
    return getWidth() > 0;
  }
}

/**
 * Finds ngram index which corresponds to ngram with specified wordId.
 * Search is performed in specified range. 
 * Fills range with ngram successors if ngram was found, makes range invalid otherwise.
 * @param ngramSet - set of ngrams of certain order to look in
 * @param wordId - word id to look for
 * @param range - range to look in. range contains ngram successors or is invalid after method usage.
 * @return ngram index that can be converted into byte offset if ngram was found, -1 otherwise
 */
private int findNgram(NgramSet ngramSet, int wordId, TrieRange range) {
  int ptr;
  range.begin--;
  if ((ptr = uniformFind(ngramSet, range, wordId)) < 0) {
    range.setFound(false);
    return -1;
  }
  //read next order ngrams for future searches
  if (ngramSet instanceof MiddleNgramSet)
    ((MiddleNgramSet)ngramSet).readNextRange(ptr, range);
  return ptr;
}

/**
 * Selects ngram of highest order available for specified word sequence
 * and extracts probability for it
 * @param wordSequence - word sequence to score
 * @param range - range to look bigram in
 * @param prob - probability of unigram
 * @return probability of of highest order ngram available
 */
private float getAvailableProb(WordSequence wordSequence, TrieRange range, float prob) {
  if (!range.isSearchable()) return prob;
  for (int reverseOrderMinusTwo = wordSequence.size() - 2; reverseOrderMinusTwo >= 0; reverseOrderMinusTwo--) {
    int orderMinusTwo = wordSequence.size() - 2 - reverseOrderMinusTwo;
    if (orderMinusTwo + 1 == maxDepth) break;
    int wordId = unigramIDMap.get(wordSequence.getWord(reverseOrderMinusTwo));
    float updatedProb = trie.readNgramProb(wordId, orderMinusTwo, range, quant);
    if (!range.getFound()) break;
    prob = updatedProb;
    curDepth++;
    if (!range.isSearchable()) break;
  }
  return prob;
}

/**
 * Searches ngram index for given wordId in provided range 
 */
private int uniformFind(NgramSet ngram, TrieRange range, int wordId) {
  TrieRange vocabRange = new TrieRange(0, ngram.maxVocab);
  while (range.getWidth() > 1) {
    int pivot = range.begin + 1 + calculatePivot(wordId - vocabRange.begin, vocabRange.getWidth(), range.getWidth() - 1);
    int mid = ngram.readNgramWord(pivot);
    if (mid < wordId) {
      range.begin = pivot;
      vocabRange.begin = mid;
    } else if (mid > wordId){
      range.end = pivot;
      vocabRange.end = mid;
    } else {
      return pivot;
    }
  }
  return -1;
}

  boolean isSearchable() {
    return getWidth() > 0;
  }
}

Javadoc

Structure to keep ngram indexes range for trie traversal

Most used methods

Popular in Java

Updating database using SQL prepared statement
scheduleAtFixedRate (ScheduledExecutorService)
putExtra (Intent)
findViewById (Activity)
Arrays (java.util)
This class contains various methods for manipulating arrays (such as sorting and searching). This cl
BitSet (java.util)
The BitSet class implements abit array [http://en.wikipedia.org/wiki/Bit_array]. Each element is eit
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
Vector (java.util)
Vector is an implementation of List, backed by an array and synchronized. All optional operations in
Notification (javax.management)
JList (javax.swing)
Top Sublime Text plugins

How to useNgramTrieModel$TrieRange in edu.cmu.sphinx.linguist.language.ngram.trie

Best Java code snippets using edu.cmu.sphinx.linguist.language.ngram.trie.NgramTrieModel$TrieRange (Showing top 12 results out of 315)

How to use
NgramTrieModel$TrieRange
in
edu.cmu.sphinx.linguist.language.ngram.trie