/** * Returns the number of bigram followers of a word. * * @param wordID * the ID of the word * @return the number of bigram followers */ private int getNumberBigramFollowers(int wordID) { if (wordID == unigrams.length - 1) return 0; else return unigrams[wordID + 1].getFirstBigramEntry() - unigrams[wordID].getFirstBigramEntry(); }
/** Apply the unigram weight to the set of unigrams */ private void applyUnigramWeight() { float logUnigramWeight = logMath.linearToLog(unigramWeight); float logNotUnigramWeight = logMath.linearToLog(1.0f - unigramWeight); float logUniform = logMath.linearToLog(1.0f / (numberNGrams[0])); float logWip = logMath.linearToLog(wip); float p2 = logUniform + logNotUnigramWeight; for (int i = 0; i < numberNGrams[0]; i++) { UnigramProbability unigram = unigrams[i]; float p1 = unigram.getLogProbability(); if (i != startWordID) { p1 += logUnigramWeight; p1 = logMath.addAsLinear(p1, p2); } if (applyLanguageWeightAndWip) { p1 = p1 * languageWeight + logWip; unigram.setLogBackoff(unigram.getLogBackoff() * languageWeight); } unigram.setLogProbability(p1); } }
/** * Returns the ID of the given word. * * @param word * the word to find the ID * @return the ID of the word */ public final int getWordID(Word word) { UnigramProbability probability = getUnigram(word); if (probability == null) throw new IllegalArgumentException("No word ID: " + word); else return probability.getWordID(); }
float logp = unigram.getLogProbability(); double p = logMath.logToLinear(logp); S0 += p * logp; ugAvgLogProb[i] = 0.0; float logugbackoff = unigrams[i].getLogBackoff(); double ugbackoff = logMath.logToLinear(logugbackoff); NGramProbability bgProb = bigram.getNGramProbability(j); float logugprob = unigrams[wordID].getLogProbability(); float logbgprob = ngramProbTable[1][bgProb.getProbabilityID()]; float logbgprob = getBigramProb(k, m); double bgprob = logMath.logToLinear(logbgprob); float logugprob = unigrams[m].getLogProbability(); double backofftgprob = bgbackoff * bgprob; double logbackofftgprob = logMath
unigram.setLogProbability(MIN_PROBABILITY); unigram.setLogBackoff(MIN_PROBABILITY);
/** * Returns the unigram probability of the given unigram. * * @param wordSequence * the unigram word sequence * @return the unigram probability */ private float getUnigramProbability(WordSequence wordSequence) { Word unigram = wordSequence.getWord(0); UnigramProbability unigramProb = getUnigram(unigram); if (unigramProb == null) throw new Error("Unigram not in LM: " + unigram); return unigramProb.getLogProbability(); }
float logBackoff = logMath.log10ToLog(unigramBackoff); unigrams[i] = new UnigramProbability(unigramID, logProbability, logBackoff, firstBigramEntry);
private Float getNGramProbability(WordSequence wordSequence) { int numberWords = wordSequence.size(); Word firstWord = wordSequence.getWord(0); if (loader.getNumberNGrams(numberWords) == 0 || !hasUnigram(firstWord)) return getNGramProbability(wordSequence.getNewest()); if (numberWords < 2) { return getUnigramProbability(wordSequence); } NGramProbability nGProbability = findNGram(wordSequence); if (nGProbability != null) { return ngramProbTable[numberWords - 1][nGProbability .getProbabilityID()]; } if (numberWords == 2) { UnigramProbability unigramProb = getUnigram(firstWord); UnigramProbability unigramProb1 = getUnigram(wordSequence .getWord(1)); return unigramProb.getLogBackoff() + unigramProb1.getLogProbability(); } NGramProbability nMinus1Gram = findNGram(wordSequence.getOldest()); if (nMinus1Gram != null) { return ngramBackoffTable[numberWords - 1][nMinus1Gram .getBackoffID()] + getProbability(wordSequence.getNewest()); } return getProbability(wordSequence.getNewest()); }
unigram.setLogProbability(MIN_PROBABILITY); unigram.setLogBackoff(MIN_PROBABILITY);
/** * Returns the unigram probability of the given unigram. * * @param wordSequence * the unigram word sequence * @return the unigram probability */ private float getUnigramProbability(WordSequence wordSequence) { Word unigram = wordSequence.getWord(0); UnigramProbability unigramProb = getUnigram(unigram); if (unigramProb == null) throw new Error("Unigram not in LM: " + unigram); return unigramProb.getLogProbability(); }
float logBackoff = logMath.log10ToLog(unigramBackoff); unigrams[i] = new UnigramProbability(unigramID, logProbability, logBackoff, firstBigramEntry);
/** Apply the unigram weight to the set of unigrams */ private void applyUnigramWeight() { float logUnigramWeight = logMath.linearToLog(unigramWeight); float logNotUnigramWeight = logMath.linearToLog(1.0f - unigramWeight); float logUniform = logMath.linearToLog(1.0f / (numberNGrams[0])); float logWip = logMath.linearToLog(wip); float p2 = logUniform + logNotUnigramWeight; for (int i = 0; i < numberNGrams[0]; i++) { UnigramProbability unigram = unigrams[i]; float p1 = unigram.getLogProbability(); if (i != startWordID) { p1 += logUnigramWeight; p1 = logMath.addAsLinear(p1, p2); } if (applyLanguageWeightAndWip) { p1 = p1 * languageWeight + logWip; unigram.setLogBackoff(unigram.getLogBackoff() * languageWeight); } unigram.setLogProbability(p1); } }
float logp = unigram.getLogProbability(); double p = logMath.logToLinear(logp); S0 += p * logp; ugAvgLogProb[i] = 0.0; float logugbackoff = unigrams[i].getLogBackoff(); double ugbackoff = logMath.logToLinear(logugbackoff); NGramProbability bgProb = bigram.getNGramProbability(j); float logugprob = unigrams[wordID].getLogProbability(); float logbgprob = ngramProbTable[1][bgProb.getProbabilityID()]; float logbgprob = getBigramProb(k, m); double bgprob = logMath.logToLinear(logbgprob); float logugprob = unigrams[m].getLogProbability(); double backofftgprob = bgbackoff * bgprob; double logbackofftgprob = logMath
NGramBuffer nMinus1Buffer = null; firstCurrentNGramEntry = unigrams[firstWordID].getFirstBigramEntry(); numberNGrams = getNumberBigramFollowers(firstWordID) + 1;
/** * Returns the ID of the given word. * * @param word * the word to find the ID * @return the ID of the word */ public final int getWordID(Word word) { UnigramProbability probability = getUnigram(word); if (probability == null) throw new IllegalArgumentException("No word ID: " + word); else return probability.getWordID(); }
private Float getNGramProbability(WordSequence wordSequence) { int numberWords = wordSequence.size(); Word firstWord = wordSequence.getWord(0); if (loader.getNumberNGrams(numberWords) == 0 || !hasUnigram(firstWord)) return getNGramProbability(wordSequence.getNewest()); if (numberWords < 2) { return getUnigramProbability(wordSequence); } NGramProbability nGProbability = findNGram(wordSequence); if (nGProbability != null) { return ngramProbTable[numberWords - 1][nGProbability .getProbabilityID()]; } if (numberWords == 2) { UnigramProbability unigramProb = getUnigram(firstWord); UnigramProbability unigramProb1 = getUnigram(wordSequence .getWord(1)); return unigramProb.getLogBackoff() + unigramProb1.getLogProbability(); } NGramProbability nMinus1Gram = findNGram(wordSequence.getOldest()); if (nMinus1Gram != null) { return ngramBackoffTable[numberWords - 1][nMinus1Gram .getBackoffID()] + getProbability(wordSequence.getNewest()); } return getProbability(wordSequence.getNewest()); }
/** * Returns the number of bigram followers of a word. * * @param wordID * the ID of the word * @return the number of bigram followers */ private int getNumberBigramFollowers(int wordID) { if (wordID == unigrams.length - 1) return 0; else return unigrams[wordID + 1].getFirstBigramEntry() - unigrams[wordID].getFirstBigramEntry(); }
NGramBuffer nMinus1Buffer = null; firstCurrentNGramEntry = unigrams[firstWordID].getFirstBigramEntry(); numberNGrams = getNumberBigramFollowers(firstWordID) + 1;