Codota Logo
LuceneLanguageModel
Code IndexAdd Codota to your IDE (free)

How to use
LuceneLanguageModel
in
org.languagetool.languagemodel

Best Java code snippets using org.languagetool.languagemodel.LuceneLanguageModel (Showing top 19 results out of 315)

  • Add the Codota plugin to your IDE and get smart completions
private void myMethod () {
ScheduledThreadPoolExecutor s =
  • Codota Iconnew ScheduledThreadPoolExecutor(corePoolSize)
  • Codota IconThreadFactory threadFactory;new ScheduledThreadPoolExecutor(corePoolSize, threadFactory)
  • Codota IconString str;new ScheduledThreadPoolExecutor(1, new ThreadFactoryBuilder().setNameFormat(str).build())
  • Smart code suggestions by Codota
}
origin: languagetool-org/languagetool

@Test
@Ignore("interferes with LuceneSingleIndexLanguageModel")
public void testSort() {
 URL ngramUrl = JLanguageTool.getDataBroker().getFromResourceDirAsUrl("/yy/ngram-index");
 try (LuceneLanguageModel model = new LuceneLanguageModel(new File(ngramUrl.getFile()))) {
  SuggestionSorter sorter = new SuggestionSorter(model);
  assertThat(sorter.sortSuggestions(Arrays.asList("thee", "the", "teh")), is(Arrays.asList("the", "thee", "teh")));
  assertThat(sorter.sortSuggestions(Arrays.asList("nuce", "foo", "nice")), is(Arrays.asList("nice", "nuce", "foo")));
  assertThat(sorter.sortSuggestions(Arrays.asList("nuce")), is(Arrays.asList("nuce")));
  assertThat(sorter.sortSuggestions(Arrays.asList("nice")), is(Arrays.asList("nice")));
  assertThat(sorter.sortSuggestions(Arrays.asList("")), is(Arrays.asList("")));
 }
}
origin: languagetool-org/languagetool

@Test
public void testLanguageModel() throws Exception {
 URL ngramUrl = JLanguageTool.getDataBroker().getFromResourceDirAsUrl("/yy/ngram-index");
 try (LuceneLanguageModel model = new LuceneLanguageModel(new File(ngramUrl.getFile()))) {
  assertThat(model.getCount("the"), is(55L));
  assertThat(model.getCount(Arrays.asList("the", "nice")), is(3L));
  assertThat(model.getCount(Arrays.asList("the", "nice", "building")), is(1L));
  assertThat(model.getCount("not-in-here"), is(0L));
  assertThat(model.getTotalTokenCount(), is(3L));
 }
}
origin: languagetool-org/languagetool

public static void validateDirectory(File topIndexDir) {
 File[] subDirs = getSubDirectoriesOrNull(topIndexDir);
 if (subDirs == null || subDirs.length == 0) {
  LuceneSingleIndexLanguageModel.validateDirectory(topIndexDir);
 }
}
origin: org.languagetool/language-it

/**
 * Closes the language model, if any. 
 * @since 3.1
 */
@Override
public void close() throws Exception {
 if (languageModel != null) {
  languageModel.close();
 }
}
origin: languagetool-org/languagetool

@Override
public long getCount(String token) {
 return getCount(Arrays.asList(token));
}
origin: org.languagetool/languagetool-gui-commons

 @Override
 public void actionPerformed(ActionEvent e) {
  File newDir = Tools.openDirectoryDialog(owner, dir);
  if (newDir != null) {
   try {
    if (config.getLanguage() != null) {  // may happen in office context
     File checkDir = new File(newDir, config.getLanguage().getShortCode());
     LuceneLanguageModel.validateDirectory(checkDir);
    }
    config.setNgramDirectory(newDir);
    ngramDirButton.setText(StringUtils.abbreviate(newDir.getAbsolutePath(), maxDirDisplayLength));
   } catch (Exception ex) {
    Tools.showErrorMessage(ex);
   }
  } else {
   // not the best UI, but this way user can turn off ngram feature without another checkbox
   config.setNgramDirectory(null);
   ngramDirButton.setText(StringUtils.abbreviate(messages.getString("guiNgramDirSelect"), maxDirDisplayLength));
  }
 }
});
origin: org.languagetool/language-zh

/**
 * Closes the language model, if any. 
 * @since 3.1
 */
@Override
public void close() throws Exception {
 if (languageModel != null) {
  languageModel.close();
 }
}
origin: languagetool-org/languagetool

long count = 0;
if (ngramLength == 2) {
 count = model.getCount(Arrays.asList(prevWord, word));
} else if (ngramLength == 3) {
 if (prevPrevWord != null) {
  count = model.getCount(Arrays.asList(prevPrevWord, prevWord, word));
origin: languagetool-org/languagetool

@Test
@Ignore("needs full index")
public void testSortWithFullNgrams() {
 String ngramIndex = "/home/dnaber/data/google-ngram-index/de";
 try (LuceneLanguageModel model = new LuceneLanguageModel(new File(ngramIndex))) {
  SuggestionSorter sorter = new SuggestionSorter(model);
  //TODO: support phrases
  //assertThat(sorter.sortSuggestions(Arrays.asList("alluvial", "allzu viel")), is(Arrays.asList("allzu viel", "alluvial")));
 }
}
origin: org.languagetool/language-es

/**
 * Closes the language model, if any. 
 * @since 3.1
 */
@Override
public void close() throws Exception {
 if (languageModel != null) {
  languageModel.close();
 }
}
origin: org.languagetool/languagetool-core

@Override
public long getCount(String token) {
 return getCount(Arrays.asList(token));
}
origin: languagetool-org/languagetool

/**
 * @param topIndexDir a directory which contains either:
 *                    1) sub directories called {@code 1grams}, {@code 2grams}, {@code 3grams},
 *                    which are Lucene indexes with ngram occurrences as created by
 *                    {@code org.languagetool.dev.FrequencyIndexCreator}
 *                    or 2) sub directories {@code index-1}, {@code index-2} etc that contain
 *                    the sub directories described under 1)
 */
public LuceneLanguageModel(File topIndexDir)  {
 File[] subDirs = getSubDirectoriesOrNull(topIndexDir);
 if (subDirs != null && subDirs.length > 0) {
  System.out.println("Running in multi-index mode with " + subDirs.length + " indexes: " + topIndexDir);
  for (File subDir : subDirs) {
   lms.add(new LuceneSingleIndexLanguageModel(subDir));
  }
 } else {
  lms.add(new LuceneSingleIndexLanguageModel(topIndexDir));
 }
}
origin: languagetool-org/languagetool

/**
 * Some values for average time per lookup on 2grams on a 3.7GB Lucene 4.8.1 index with 118,941,740 docs:
 * -no data in OS cache, index on external USB disk: 17626µs = 17ms
 * -no data in OS cache, index on SSD: 739µs = <0ms
 * -all data in OS cache (by running the test more than once): 163µs = <0ms
 * 
 * Some values for average time per lookup on 3grams on a 7.0GB Lucene 4.9 index:
 * -no data in OS cache, index on external USB disk: 13256µs = 13ms
 * -no data in OS cache, index on SSD: 791µs = <0ms
 * -all(?) data in OS cache (by running the test more than once): 162µs = <0ms
 * 
 * The tests have been performed on a Dell XSP13 (i7-3537U CPU) under Ubuntu 12.04, with Java 1.7.
 */
@Test
@Ignore("for interactive use only")
public void testPerformance() throws Exception {
 // 2grams:
 //LanguageModel model = new LuceneLanguageModel(new File("/media/Data/google-ngram/2gram/lucene-index/merged/"));
 //super.testPerformance(model, 2);
 // 3grams:
 //LanguageModel model = new LuceneLanguageModel(new File("/media/Data/google-ngram/3gram/aggregated/lucene-index/merged/"));
 LuceneLanguageModel model = new LuceneLanguageModel(new File("/data/google-gram-index/"));
 super.testPerformance(model, 3);
}

origin: org.languagetool/languagetool-core

public static void validateDirectory(File topIndexDir) {
 File[] subDirs = getSubDirectoriesOrNull(topIndexDir);
 if (subDirs == null || subDirs.length == 0) {
  LuceneSingleIndexLanguageModel.validateDirectory(topIndexDir);
 }
}
origin: org.languagetool/language-zh

/** @since 3.1 */
@Override
public synchronized LanguageModel getLanguageModel(File indexDir) throws IOException {
 if (languageModel == null) {
  languageModel = new LuceneLanguageModel(new File(indexDir, getShortCode()));
 }
 return languageModel;
}
origin: org.languagetool/languagetool-core

/**
 * @param topIndexDir a directory which contains either:
 *                    1) sub directories called {@code 1grams}, {@code 2grams}, {@code 3grams},
 *                    which are Lucene indexes with ngram occurrences as created by
 *                    {@code org.languagetool.dev.FrequencyIndexCreator}
 *                    or 2) sub directories {@code index-1}, {@code index-2} etc that contain
 *                    the sub directories described under 1)
 */
public LuceneLanguageModel(File topIndexDir)  {
 File[] subDirs = getSubDirectoriesOrNull(topIndexDir);
 if (subDirs != null && subDirs.length > 0) {
  System.out.println("Running in multi-index mode with " + subDirs.length + " indexes: " + topIndexDir);
  for (File subDir : subDirs) {
   lms.add(new LuceneSingleIndexLanguageModel(subDir));
  }
 } else {
  lms.add(new LuceneSingleIndexLanguageModel(topIndexDir));
 }
}
origin: org.languagetool/language-it

/** @since 3.1 */
@Override
public synchronized LanguageModel getLanguageModel(File indexDir) throws IOException {
 if (languageModel == null) {
  languageModel = new LuceneLanguageModel(new File(indexDir, getShortCode()));
 }
 return languageModel;
}
origin: org.languagetool/language-es

/** @since 3.1 */
@Override
public synchronized LanguageModel getLanguageModel(File indexDir) throws IOException {
 if (languageModel == null) {
  languageModel = new LuceneLanguageModel(new File(indexDir, getShortCode()));
 }
 return languageModel;
}
origin: org.languagetool/language-de

@Override
public synchronized LanguageModel getLanguageModel(File indexDir) throws IOException {
 if (languageModel == null) {
  languageModel = new LuceneLanguageModel(new File(indexDir, getShortCode()));
  // for testing:
  //languageModel = new BerkeleyRawLanguageModel(new File("/media/Data/berkeleylm/google_books_binaries/ger.blm.gz"));
  //languageModel = new BerkeleyLanguageModel(new File("/media/Data/berkeleylm/google_books_binaries/ger.blm.gz"));
 }
 return languageModel;
}
org.languagetool.languagemodelLuceneLanguageModel

Javadoc

Like LuceneSingleIndexLanguageModel, but can merge the results of lookups in several independent indexes to one result.

Most used methods

  • <init>
  • close
  • getCount
  • getSubDirectoriesOrNull
  • getTotalTokenCount
  • validateDirectory

Popular in Java

  • Finding current android device location
  • getResourceAsStream (ClassLoader)
  • setRequestProperty (URLConnection)
    Sets the general request property. If a property with the key already exists, overwrite its value wi
  • getExternalFilesDir (Context)
  • HttpServer (com.sun.net.httpserver)
    This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
  • PrintWriter (java.io)
    Prints formatted representations of objects to a text-output stream. This class implements all of th
  • Arrays (java.util)
    This class contains various methods for manipulating arrays (such as sorting and searching). This cl
  • BitSet (java.util)
    This class implements a vector of bits that grows as needed. Each component of the bit set has a boo
  • GregorianCalendar (java.util)
    GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
  • Executors (java.util.concurrent)
    Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
Codota Logo
  • Products

    Search for Java codeSearch for JavaScript codeEnterprise
  • IDE Plugins

    IntelliJ IDEAWebStormAndroid StudioEclipseVisual Studio CodePyCharmSublime TextPhpStormVimAtomGoLandRubyMineEmacsJupyter
  • Company

    About UsContact UsCareers
  • Resources

    FAQBlogCodota Academy Plugin user guide Terms of usePrivacy policyJava Code IndexJavascript Code Index
Get Codota for your IDE now