/** * update language probabilities with N-gram string(N=1,2,3) * @param count 1-n: how often the gram occurred. */ private boolean updateLangProb(@NotNull double[] prob, @NotNull String ngram, int count, double alpha) { double[] langProbMap = ngramFrequencyData.getProbabilities(ngram); if (langProbMap==null) { return false; } if (logger.isTraceEnabled()) logger.trace(ngram + "(" + Util.unicodeEncode(ngram) + "):" + Util.wordProbToString(langProbMap, ngramFrequencyData.getLanguageList())); double weight = alpha / BASE_FREQ; if (ngram.length() >1) { if (prefixFactor !=1.0 && ngram.charAt(0)==' ') { weight *= prefixFactor; } else if (suffixFactor!=1.0 && ngram.charAt(ngram.length()-1)==' ') { weight *= suffixFactor; } } for (int i=0; i<prob.length; ++i) { for (int amount=0; amount<count; amount++) { prob[i] *= (weight + langProbMap[i]); } } return true; }
/** * update language probabilities with N-gram string(N=1,2,3) * @param count 1-n: how often the gram occurred. */ private boolean updateLangProb(@NotNull double[] prob, @NotNull String ngram, int count, double alpha) { double[] langProbMap = ngramFrequencyData.getProbabilities(ngram); if (langProbMap==null) { return false; } if (logger.isTraceEnabled()) logger.trace(ngram + "(" + Util.unicodeEncode(ngram) + "):" + Util.wordProbToString(langProbMap, ngramFrequencyData.getLanguageList())); double weight = alpha / BASE_FREQ; if (ngram.length() >1) { if (prefixFactor !=1.0 && ngram.charAt(0)==' ') { weight *= prefixFactor; } else if (suffixFactor!=1.0 && ngram.charAt(ngram.length()-1)==' ') { weight *= suffixFactor; } } for (int i=0; i<prob.length; ++i) { for (int amount=0; amount<count; amount++) { prob[i] *= (weight + langProbMap[i]); } } return true; }