edu.cmu.sphinx.linguist.language.ngram.large.BinaryLoader java code examples

    || location.getProtocol().equals("file")) {
  try {
    loader = new BinaryLoader(new File(location.toURI()), format,
        applyLanguageWeightAndWip, languageWeight, wip,
        unigramWeight);
  } catch (Exception e) {
    loader = new BinaryLoader(new File(location.getPath()), format,
        applyLanguageWeightAndWip, languageWeight, wip,
        unigramWeight);
unigrams = loader.getUnigrams();
loadedNGramBuffers = new Map[loader.getMaxDepth()];
ngramProbTable = new float[loader.getMaxDepth()][];
ngramBackoffTable = new float[loader.getMaxDepth()][];
ngramSegmentTable = new int[loader.getMaxDepth()][];
for (int i = 1; i <= loader.getMaxDepth(); i++) {
  loadedNGramBuffers[i - 1] = new HashMap<WordSequence, NGramBuffer>();
    ngramProbTable[i - 1] = loader.getNGramProbabilities(i);
    ngramBackoffTable[i - 1] = loader.getNGramBackoffWeights(i);
    ngramSegmentTable[i - 1] = loader.getNGramSegments(i);
if (maxDepth <= 0 || maxDepth > loader.getMaxDepth())
  maxDepth = loader.getMaxDepth();
for (int i = 1; i <= loader.getMaxDepth(); i++)
  logger.info(Integer.toString(i) + "-grams: "

readHeader(stream);
unigrams = readUnigrams(stream, numberNGrams[0] + 1, bigEndian);
skipNGrams(stream);
  if (numberNGrams[i] > 0) {
    if (i == 1) {
      NGramProbTable[i] = readFloatTable(stream, bigEndian);
      NGramBackoffTable[i] = readFloatTable(stream, bigEndian);
      NGramProbTable[i] = readFloatTable(stream, bigEndian);
      NGramSegmentTable[i] = readIntTable(stream, bigEndian, NGramSegTableSize);
int wordsStringLength = readInt(stream, bigEndian);
if (wordsStringLength <= 0) {
  throw new Error("Bad word string size: " + wordsStringLength);
this.words = readWords(stream, wordsStringLength, numberNGrams[0]);
applyUnigramWeight();
    applyLanguageWeight(NGramProbTable[i], languageWeight);
    applyWip(NGramProbTable[i], wip);
      applyLanguageWeight(NGramBackoffTable[i], languageWeight);

public void deallocate() throws IOException {
  loader.deallocate();
}

/**
 * Skips the NGrams of the LM.
 * 
 * @param stream
 *            the source of data
 * @throws java.io.IOException
 */
private void skipNGrams(DataInputStream stream) throws IOException {
  long bytesToSkip;
  NGramOffset[1] = bytesRead;
  bytesToSkip = (numberNGrams[1] + 1) * LargeNGramModel.BYTES_PER_NGRAM * getBytesPerField();
  skipStreamBytes(stream, bytesToSkip);
  for (int i = 2; i < maxNGram; i++) {
    if (numberNGrams[i] > 0 && i < maxNGram - 1) {
      NGramOffset[i] = bytesRead;
      bytesToSkip = (long) (numberNGrams[i] + 1) * (long) LargeNGramModel.BYTES_PER_NGRAM * getBytesPerField();
      skipStreamBytes(stream, bytesToSkip);
    } else if (numberNGrams[i] > 0 && i == maxNGram - 1) {
      NGramOffset[i] = bytesRead;
      bytesToSkip = (long) (numberNGrams[i]) * (long) LargeNGramModel.BYTES_PER_NMAXGRAM * getBytesPerField();
      skipStreamBytes(stream, bytesToSkip);
    }
  }
}

      * ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM
          : BYTES_PER_NGRAM) * loader.getBytesPerField();
  position = (loader.getNGramOffset(orderBuffer) + (firstCurrentNGramEntry
      * ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM
          : BYTES_PER_NGRAM) * loader.getBytesPerField()));
} else { // only for ws.size() >= 2
  int lastWordId = getWordID(ws.getWord(ws.size() - 1));
  if (loader.getMaxDepth() != orderBuffer)
    numberNGrams++;
      * ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM
          : BYTES_PER_NGRAM) * loader.getBytesPerField();
  position = loader.getNGramOffset(orderBuffer)
      + (long) firstCurrentNGramEntry
      * (long) ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM
          : BYTES_PER_NGRAM) * loader.getBytesPerField();
  byte[] buffer = loader.loadBuffer(position, size);
  if (loader.getMaxDepth() == orderBuffer) {
    currentBuffer = new NMaxGramBuffer(buffer, numberNGrams,
        loader.getBigEndian(), is32bits(), orderBuffer,
        firstCurrentNGramEntry);
  } else {
    currentBuffer = new NGramBuffer(buffer, numberNGrams,
        loader.getBigEndian(), is32bits(), orderBuffer,
        firstCurrentNGramEntry);

/**
 * Tells if the model is 16 or 32 bits.
 * 
 * @return true if 32 bits, false otherwise
 */
private boolean is32bits() {
  if (loader.getBytesPerField() == 4)
    return true;
  return false;
}

/**
 * Returns all the trigram backoff weights
 *
 * @return all the trigram backoff weights
 */
public float[] getTrigramBackoffWeights() {
  return getNGramBackoffWeights(3);
}

/**
 * Returns the index of the first NGram entry of the given N-1Gram
 * 
 * @param nMinus1Gram
 *            the N-1Gram which first NGram entry we're looking for
 * @param firstNMinus1GramEntry
 *            the index of the first N-1Gram entry of the N-1Gram in
 *            question
 * @param n
 *            the order of the NGram
 * @return the index of the first NGram entry of the given N-1Gram
 */
private int getFirstNGramEntry(NGramProbability nMinus1Gram,
    int firstNMinus1GramEntry, int n) {
  int firstNGramEntry = ngramSegmentTable[n - 1][(firstNMinus1GramEntry + nMinus1Gram
      .getWhichFollower()) >> loader.getLogNGramSegmentSize()]
      + nMinus1Gram.getFirstNPlus1GramEntry();
  return firstNGramEntry;
}

/** Clears the various N-gram caches. */
private void clearCache() {
  for (int i = 0; i < loadedBigramBuffers.length; i++) {
    NGramBuffer buffer = loadedBigramBuffers[i];
    if (buffer != null) {
      if (!buffer.getUsed())
        loadedBigramBuffers[i] = null; // free the BigramBuffer
      else
        buffer.setUsed(false);
    }
  }
  loadedBigramBuffers = new NGramBuffer[unigrams.length];
  for (int i = 2; i <= loader.getMaxDepth(); i++) {
    loadedNGramBuffers[i - 1] = new HashMap<WordSequence, NGramBuffer>();
  }
  logger.info("LM Cache Size: " + ngramProbCache.size() + " Hits: "
      + ngramHits + " Misses: " + ngramMisses);
  if (clearCacheAfterUtterance) {
    ngramProbCache = new LRUCache<WordSequence, Float>(ngramCacheSize);
  }
}

      * ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM
          : BYTES_PER_NGRAM) * loader.getBytesPerField();
  position = (loader.getNGramOffset(orderBuffer) + (firstCurrentNGramEntry
      * ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM
          : BYTES_PER_NGRAM) * loader.getBytesPerField()));
} else { // only for ws.size() >= 2
  int lastWordId = getWordID(ws.getWord(ws.size() - 1));
  if (loader.getMaxDepth() != orderBuffer)
    numberNGrams++;
      * ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM
          : BYTES_PER_NGRAM) * loader.getBytesPerField();
  position = loader.getNGramOffset(orderBuffer)
      + (long) firstCurrentNGramEntry
      * (long) ((loader.getMaxDepth() == orderBuffer) ? BYTES_PER_NMAXGRAM
          : BYTES_PER_NGRAM) * loader.getBytesPerField();
  byte[] buffer = loader.loadBuffer(position, size);
  if (loader.getMaxDepth() == orderBuffer) {
    currentBuffer = new NMaxGramBuffer(buffer, numberNGrams,
        loader.getBigEndian(), is32bits(), orderBuffer,
        firstCurrentNGramEntry);
  } else {
    currentBuffer = new NGramBuffer(buffer, numberNGrams,
        loader.getBigEndian(), is32bits(), orderBuffer,
        firstCurrentNGramEntry);

/**
 * Skips the NGrams of the LM.
 * 
 * @param stream
 *            the source of data
 * @throws java.io.IOException
 */
private void skipNGrams(DataInputStream stream) throws IOException {
  long bytesToSkip;
  NGramOffset[1] = bytesRead;
  bytesToSkip = (numberNGrams[1] + 1) * LargeNGramModel.BYTES_PER_NGRAM * getBytesPerField();
  skipStreamBytes(stream, bytesToSkip);
  for (int i = 2; i < maxNGram; i++) {
    if (numberNGrams[i] > 0 && i < maxNGram - 1) {
      NGramOffset[i] = bytesRead;
      bytesToSkip = (long) (numberNGrams[i] + 1) * (long) LargeNGramModel.BYTES_PER_NGRAM * getBytesPerField();
      skipStreamBytes(stream, bytesToSkip);
    } else if (numberNGrams[i] > 0 && i == maxNGram - 1) {
      NGramOffset[i] = bytesRead;
      bytesToSkip = (long) (numberNGrams[i]) * (long) LargeNGramModel.BYTES_PER_NMAXGRAM * getBytesPerField();
      skipStreamBytes(stream, bytesToSkip);
    }
  }
}

/**
 * Tells if the model is 16 or 32 bits.
 * 
 * @return true if 32 bits, false otherwise
 */
private boolean is32bits() {
  if (loader.getBytesPerField() == 4)
    return true;
  return false;
}

/**
 * Returns all the trigram backoff weights
 *
 * @return all the trigram backoff weights
 */
public float[] getTrigramBackoffWeights() {
  return getNGramBackoffWeights(3);
}

/**
 * Returns the index of the first NGram entry of the given N-1Gram
 * 
 * @param nMinus1Gram
 *            the N-1Gram which first NGram entry we're looking for
 * @param firstNMinus1GramEntry
 *            the index of the first N-1Gram entry of the N-1Gram in
 *            question
 * @param n
 *            the order of the NGram
 * @return the index of the first NGram entry of the given N-1Gram
 */
private int getFirstNGramEntry(NGramProbability nMinus1Gram,
    int firstNMinus1GramEntry, int n) {
  int firstNGramEntry = ngramSegmentTable[n - 1][(firstNMinus1GramEntry + nMinus1Gram
      .getWhichFollower()) >> loader.getLogNGramSegmentSize()]
      + nMinus1Gram.getFirstNPlus1GramEntry();
  return firstNGramEntry;
}

/** Clears the various N-gram caches. */
private void clearCache() {
  for (int i = 0; i < loadedBigramBuffers.length; i++) {
    NGramBuffer buffer = loadedBigramBuffers[i];
    if (buffer != null) {
      if (!buffer.getUsed())
        loadedBigramBuffers[i] = null; // free the BigramBuffer
      else
        buffer.setUsed(false);
    }
  }
  loadedBigramBuffers = new NGramBuffer[unigrams.length];
  for (int i = 2; i <= loader.getMaxDepth(); i++) {
    loadedNGramBuffers[i - 1] = new HashMap<WordSequence, NGramBuffer>();
  }
  logger.info("LM Cache Size: " + ngramProbCache.size() + " Hits: "
      + ngramHits + " Misses: " + ngramMisses);
  if (clearCacheAfterUtterance) {
    ngramProbCache = new LRUCache<WordSequence, Float>(ngramCacheSize);
  }
}

readHeader(stream);
unigrams = readUnigrams(stream, numberNGrams[0] + 1, bigEndian);
skipNGrams(stream);
  if (numberNGrams[i] > 0) {
    if (i == 1) {
      NGramProbTable[i] = readFloatTable(stream, bigEndian);
      NGramBackoffTable[i] = readFloatTable(stream, bigEndian);
      NGramProbTable[i] = readFloatTable(stream, bigEndian);
      NGramSegmentTable[i] = readIntTable(stream, bigEndian, NGramSegTableSize);
int wordsStringLength = readInt(stream, bigEndian);
if (wordsStringLength <= 0) {
  throw new Error("Bad word string size: " + wordsStringLength);
this.words = readWords(stream, wordsStringLength, numberNGrams[0]);
applyUnigramWeight();
    applyLanguageWeight(NGramProbTable[i], languageWeight);
    applyWip(NGramProbTable[i], wip);
      applyLanguageWeight(NGramBackoffTable[i], languageWeight);

    || location.getProtocol().equals("file")) {
  try {
    loader = new BinaryLoader(new File(location.toURI()), format,
        applyLanguageWeightAndWip, languageWeight, wip,
        unigramWeight);
  } catch (Exception e) {
    loader = new BinaryLoader(new File(location.getPath()), format,
        applyLanguageWeightAndWip, languageWeight, wip,
        unigramWeight);
unigrams = loader.getUnigrams();
loadedNGramBuffers = new Map[loader.getMaxDepth()];
ngramProbTable = new float[loader.getMaxDepth()][];
ngramBackoffTable = new float[loader.getMaxDepth()][];
ngramSegmentTable = new int[loader.getMaxDepth()][];
for (int i = 1; i <= loader.getMaxDepth(); i++) {
  loadedNGramBuffers[i - 1] = new HashMap<WordSequence, NGramBuffer>();
    ngramProbTable[i - 1] = loader.getNGramProbabilities(i);
    ngramBackoffTable[i - 1] = loader.getNGramBackoffWeights(i);
    ngramSegmentTable[i - 1] = loader.getNGramSegments(i);
if (maxDepth <= 0 || maxDepth > loader.getMaxDepth())
  maxDepth = loader.getMaxDepth();
for (int i = 1; i <= loader.getMaxDepth(); i++)
  logger.info(Integer.toString(i) + "-grams: "

public void deallocate() throws IOException {
  loader.deallocate();
}

Javadoc

Reads a binary NGram language model file ("DMP file") generated by the SphinxBase sphinx_lm_convert.

Note that all probabilities in the grammar are stored in LogMath log base format. Language Probabilities in the language model file are stored in log 10 base. They are converted to the LogMath base.

Most used methods

<init>
Initializes the binary loader
applyLanguageWeight
Apply the language weight to the given array of probabilities.
applyUnigramWeight
Apply the unigram weight to the set of unigrams
applyWip
Apply the WIP to the given array of probabilities.
deallocate
getBigEndian
Returns true if the loaded file is in big-endian.
getBytesPerField
Returns the multiplier for the size of a NGram (1 for 16 bits, 2 for 32 bits).
getLogNGramSegmentSize
Returns the log of the NGram segment size
getMaxDepth
Returns the maximum depth of the language model
getNGramBackoffWeights
Returns all the NGram backoff weights at a specified N order.
getNGramOffset
Returns the location (or offset) into the file where NGrams start at a specified N order.
getNGramProbabilities
Returns all the NGram probabilities at a specified N order.

Popular in Java

Making http post requests using okhttp
getSupportFragmentManager (FragmentActivity)
addToBackStack (FragmentTransaction)
setContentView (Activity)
FileReader (java.io)
A specialized Reader that reads from a file in the file system. All read requests made by calling me
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
MessageDigest (java.security)
Uses a one-way hash function to turn an arbitrary number of bytes into a fixed-length byte sequence.
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
Properties (java.util)
A Properties object is a Hashtable where the keys and values must be Strings. Each property can have
BasicDataSource (org.apache.commons.dbcp)
Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
Github Copilot alternatives

How to useBinaryLoader in edu.cmu.sphinx.linguist.language.ngram.large

Best Java code snippets using edu.cmu.sphinx.linguist.language.ngram.large.BinaryLoader (Showing top 18 results out of 315)

How to use
BinaryLoader
in
edu.cmu.sphinx.linguist.language.ngram.large