|| location.getProtocol().equals("file")) { try { loader = new BinaryLoader(new File(location.toURI()), format, applyLanguageWeightAndWip, languageWeight, wip, unigramWeight); } catch (Exception e) { loader = new BinaryLoader(new File(location.getPath()), format, applyLanguageWeightAndWip, languageWeight, wip, unigramWeight); unigrams = loader.getUnigrams(); loadedNGramBuffers = new Map[loader.getMaxDepth()]; ngramProbTable = new float[loader.getMaxDepth()][]; ngramBackoffTable = new float[loader.getMaxDepth()][]; ngramSegmentTable = new int[loader.getMaxDepth()][]; for (int i = 1; i <= loader.getMaxDepth(); i++) { loadedNGramBuffers[i - 1] = new HashMap<WordSequence, NGramBuffer>(); ngramProbTable[i - 1] = loader.getNGramProbabilities(i); ngramBackoffTable[i - 1] = loader.getNGramBackoffWeights(i); ngramSegmentTable[i - 1] = loader.getNGramSegments(i); if (maxDepth <= 0 || maxDepth > loader.getMaxDepth()) maxDepth = loader.getMaxDepth(); for (int i = 1; i <= loader.getMaxDepth(); i++) logger.info(Integer.toString(i) + "-grams: "
readHeader(stream); unigrams = readUnigrams(stream, numberNGrams[0] + 1, bigEndian); skipNGrams(stream); if (numberNGrams[i] > 0) { if (i == 1) { NGramProbTable[i] = readFloatTable(stream, bigEndian); NGramBackoffTable[i] = readFloatTable(stream, bigEndian); NGramProbTable[i] = readFloatTable(stream, bigEndian); NGramSegmentTable[i] = readIntTable(stream, bigEndian, NGramSegTableSize); int wordsStringLength = readInt(stream, bigEndian); if (wordsStringLength <= 0) { throw new Error("Bad word string size: " + wordsStringLength); this.words = readWords(stream, wordsStringLength, numberNGrams[0]); applyUnigramWeight(); applyLanguageWeight(NGramProbTable[i], languageWeight); applyWip(NGramProbTable[i], wip); applyLanguageWeight(NGramBackoffTable[i], languageWeight);
public void deallocate() throws IOException { loader.deallocate(); }
/** * Skips the NGrams of the LM. * * @param stream * the source of data * @throws java.io.IOException */ private void skipNGrams(DataInputStream stream) throws IOException { long bytesToSkip; NGramOffset[1] = bytesRead; bytesToSkip = (numberNGrams[1] + 1) * LargeNGramModel.BYTES_PER_NGRAM * getBytesPerField(); skipStreamBytes(stream, bytesToSkip); for (int i = 2; i < maxNGram; i++) { if (numberNGrams[i] > 0 && i < maxNGram - 1) { NGramOffset[i] = bytesRead; bytesToSkip = (long) (numberNGrams[i] + 1) * (long) LargeNGramModel.BYTES_PER_NGRAM * getBytesPerField(); skipStreamBytes(stream, bytesToSkip); } else if (numberNGrams[i] > 0 && i == maxNGram - 1) { NGramOffset[i] = bytesRead; bytesToSkip = (long) (numberNGrams[i]) * (long) LargeNGramModel.BYTES_PER_NMAXGRAM * getBytesPerField(); skipStreamBytes(stream, bytesToSkip); } } }
* ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM : BYTES_PER_NGRAM) * loader.getBytesPerField(); position = (loader.getNGramOffset(orderBuffer) + (firstCurrentNGramEntry * ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM : BYTES_PER_NGRAM) * loader.getBytesPerField())); } else { // only for ws.size() >= 2 int lastWordId = getWordID(ws.getWord(ws.size() - 1)); if (loader.getMaxDepth() != orderBuffer) numberNGrams++; * ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM : BYTES_PER_NGRAM) * loader.getBytesPerField(); position = loader.getNGramOffset(orderBuffer) + (long) firstCurrentNGramEntry * (long) ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM : BYTES_PER_NGRAM) * loader.getBytesPerField(); byte[] buffer = loader.loadBuffer(position, size); if (loader.getMaxDepth() == orderBuffer) { currentBuffer = new NMaxGramBuffer(buffer, numberNGrams, loader.getBigEndian(), is32bits(), orderBuffer, firstCurrentNGramEntry); } else { currentBuffer = new NGramBuffer(buffer, numberNGrams, loader.getBigEndian(), is32bits(), orderBuffer, firstCurrentNGramEntry);
/** * Tells if the model is 16 or 32 bits. * * @return true if 32 bits, false otherwise */ private boolean is32bits() { if (loader.getBytesPerField() == 4) return true; return false; }
/** * Returns all the trigram backoff weights * * @return all the trigram backoff weights */ public float[] getTrigramBackoffWeights() { return getNGramBackoffWeights(3); }
/** * Returns the index of the first NGram entry of the given N-1Gram * * @param nMinus1Gram * the N-1Gram which first NGram entry we're looking for * @param firstNMinus1GramEntry * the index of the first N-1Gram entry of the N-1Gram in * question * @param n * the order of the NGram * @return the index of the first NGram entry of the given N-1Gram */ private int getFirstNGramEntry(NGramProbability nMinus1Gram, int firstNMinus1GramEntry, int n) { int firstNGramEntry = ngramSegmentTable[n - 1][(firstNMinus1GramEntry + nMinus1Gram .getWhichFollower()) >> loader.getLogNGramSegmentSize()] + nMinus1Gram.getFirstNPlus1GramEntry(); return firstNGramEntry; }
/** Clears the various N-gram caches. */ private void clearCache() { for (int i = 0; i < loadedBigramBuffers.length; i++) { NGramBuffer buffer = loadedBigramBuffers[i]; if (buffer != null) { if (!buffer.getUsed()) loadedBigramBuffers[i] = null; // free the BigramBuffer else buffer.setUsed(false); } } loadedBigramBuffers = new NGramBuffer[unigrams.length]; for (int i = 2; i <= loader.getMaxDepth(); i++) { loadedNGramBuffers[i - 1] = new HashMap<WordSequence, NGramBuffer>(); } logger.info("LM Cache Size: " + ngramProbCache.size() + " Hits: " + ngramHits + " Misses: " + ngramMisses); if (clearCacheAfterUtterance) { ngramProbCache = new LRUCache<WordSequence, Float>(ngramCacheSize); } }
* ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM : BYTES_PER_NGRAM) * loader.getBytesPerField(); position = (loader.getNGramOffset(orderBuffer) + (firstCurrentNGramEntry * ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM : BYTES_PER_NGRAM) * loader.getBytesPerField())); } else { // only for ws.size() >= 2 int lastWordId = getWordID(ws.getWord(ws.size() - 1)); if (loader.getMaxDepth() != orderBuffer) numberNGrams++; * ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM : BYTES_PER_NGRAM) * loader.getBytesPerField(); position = loader.getNGramOffset(orderBuffer) + (long) firstCurrentNGramEntry * (long) ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM : BYTES_PER_NGRAM) * loader.getBytesPerField(); byte[] buffer = loader.loadBuffer(position, size); if (loader.getMaxDepth() == orderBuffer) { currentBuffer = new NMaxGramBuffer(buffer, numberNGrams, loader.getBigEndian(), is32bits(), orderBuffer, firstCurrentNGramEntry); } else { currentBuffer = new NGramBuffer(buffer, numberNGrams, loader.getBigEndian(), is32bits(), orderBuffer, firstCurrentNGramEntry);
/** * Skips the NGrams of the LM. * * @param stream * the source of data * @throws java.io.IOException */ private void skipNGrams(DataInputStream stream) throws IOException { long bytesToSkip; NGramOffset[1] = bytesRead; bytesToSkip = (numberNGrams[1] + 1) * LargeNGramModel.BYTES_PER_NGRAM * getBytesPerField(); skipStreamBytes(stream, bytesToSkip); for (int i = 2; i < maxNGram; i++) { if (numberNGrams[i] > 0 && i < maxNGram - 1) { NGramOffset[i] = bytesRead; bytesToSkip = (long) (numberNGrams[i] + 1) * (long) LargeNGramModel.BYTES_PER_NGRAM * getBytesPerField(); skipStreamBytes(stream, bytesToSkip); } else if (numberNGrams[i] > 0 && i == maxNGram - 1) { NGramOffset[i] = bytesRead; bytesToSkip = (long) (numberNGrams[i]) * (long) LargeNGramModel.BYTES_PER_NMAXGRAM * getBytesPerField(); skipStreamBytes(stream, bytesToSkip); } } }
/** * Tells if the model is 16 or 32 bits. * * @return true if 32 bits, false otherwise */ private boolean is32bits() { if (loader.getBytesPerField() == 4) return true; return false; }
/** * Returns all the trigram backoff weights * * @return all the trigram backoff weights */ public float[] getTrigramBackoffWeights() { return getNGramBackoffWeights(3); }
/** * Returns the index of the first NGram entry of the given N-1Gram * * @param nMinus1Gram * the N-1Gram which first NGram entry we're looking for * @param firstNMinus1GramEntry * the index of the first N-1Gram entry of the N-1Gram in * question * @param n * the order of the NGram * @return the index of the first NGram entry of the given N-1Gram */ private int getFirstNGramEntry(NGramProbability nMinus1Gram, int firstNMinus1GramEntry, int n) { int firstNGramEntry = ngramSegmentTable[n - 1][(firstNMinus1GramEntry + nMinus1Gram .getWhichFollower()) >> loader.getLogNGramSegmentSize()] + nMinus1Gram.getFirstNPlus1GramEntry(); return firstNGramEntry; }
/** Clears the various N-gram caches. */ private void clearCache() { for (int i = 0; i < loadedBigramBuffers.length; i++) { NGramBuffer buffer = loadedBigramBuffers[i]; if (buffer != null) { if (!buffer.getUsed()) loadedBigramBuffers[i] = null; // free the BigramBuffer else buffer.setUsed(false); } } loadedBigramBuffers = new NGramBuffer[unigrams.length]; for (int i = 2; i <= loader.getMaxDepth(); i++) { loadedNGramBuffers[i - 1] = new HashMap<WordSequence, NGramBuffer>(); } logger.info("LM Cache Size: " + ngramProbCache.size() + " Hits: " + ngramHits + " Misses: " + ngramMisses); if (clearCacheAfterUtterance) { ngramProbCache = new LRUCache<WordSequence, Float>(ngramCacheSize); } }
readHeader(stream); unigrams = readUnigrams(stream, numberNGrams[0] + 1, bigEndian); skipNGrams(stream); if (numberNGrams[i] > 0) { if (i == 1) { NGramProbTable[i] = readFloatTable(stream, bigEndian); NGramBackoffTable[i] = readFloatTable(stream, bigEndian); NGramProbTable[i] = readFloatTable(stream, bigEndian); NGramSegmentTable[i] = readIntTable(stream, bigEndian, NGramSegTableSize); int wordsStringLength = readInt(stream, bigEndian); if (wordsStringLength <= 0) { throw new Error("Bad word string size: " + wordsStringLength); this.words = readWords(stream, wordsStringLength, numberNGrams[0]); applyUnigramWeight(); applyLanguageWeight(NGramProbTable[i], languageWeight); applyWip(NGramProbTable[i], wip); applyLanguageWeight(NGramBackoffTable[i], languageWeight);
|| location.getProtocol().equals("file")) { try { loader = new BinaryLoader(new File(location.toURI()), format, applyLanguageWeightAndWip, languageWeight, wip, unigramWeight); } catch (Exception e) { loader = new BinaryLoader(new File(location.getPath()), format, applyLanguageWeightAndWip, languageWeight, wip, unigramWeight); unigrams = loader.getUnigrams(); loadedNGramBuffers = new Map[loader.getMaxDepth()]; ngramProbTable = new float[loader.getMaxDepth()][]; ngramBackoffTable = new float[loader.getMaxDepth()][]; ngramSegmentTable = new int[loader.getMaxDepth()][]; for (int i = 1; i <= loader.getMaxDepth(); i++) { loadedNGramBuffers[i - 1] = new HashMap<WordSequence, NGramBuffer>(); ngramProbTable[i - 1] = loader.getNGramProbabilities(i); ngramBackoffTable[i - 1] = loader.getNGramBackoffWeights(i); ngramSegmentTable[i - 1] = loader.getNGramSegments(i); if (maxDepth <= 0 || maxDepth > loader.getMaxDepth()) maxDepth = loader.getMaxDepth(); for (int i = 1; i <= loader.getMaxDepth(); i++) logger.info(Integer.toString(i) + "-grams: "
public void deallocate() throws IOException { loader.deallocate(); }