InputStream dicInputStream = new FileInputStream(args[offset++]); HunspellDictionary dictionary = new HunspellDictionary(affixInputStream, dicInputStream, Version.LUCENE_34, ignoreCase);
/** * Creates a new HunspellDictionary containing the information read from the provided InputStreams to hunspell affix * and dictionary files. * You have to close the provided InputStreams yourself. * * @param affix InputStream for reading the hunspell affix file (won't be closed). * @param dictionaries InputStreams for reading the hunspell dictionary file (won't be closed). * @param version Lucene Version * @param ignoreCase If true, dictionary matching will be case insensitive * @throws IOException Can be thrown while reading from the InputStreams * @throws ParseException Can be thrown if the content of the files does not meet expected formats */ public HunspellDictionary(InputStream affix, List<InputStream> dictionaries, Version version, boolean ignoreCase) throws IOException, ParseException { this.version = version; this.ignoreCase = ignoreCase; String encoding = getDictionaryEncoding(affix); CharsetDecoder decoder = getJavaEncoding(encoding); readAffixFile(affix, decoder); words = new CharArrayMap<List<HunspellWord>>(version, 65535 /* guess */, this.ignoreCase); for (InputStream dictionary : dictionaries) { readDictionaryFile(dictionary, decoder); } }
/** * Reads the affix file through the provided InputStream, building up the prefix and suffix maps * * @param affixStream InputStream to read the content of the affix file from * @param decoder CharsetDecoder to decode the content of the file * @throws IOException Can be thrown while reading from the InputStream */ private void readAffixFile(InputStream affixStream, CharsetDecoder decoder) throws IOException { prefixes = new CharArrayMap<List<HunspellAffix>>(version, 8, ignoreCase); suffixes = new CharArrayMap<List<HunspellAffix>>(version, 8, ignoreCase); BufferedReader reader = new BufferedReader(new InputStreamReader(affixStream, decoder)); String line = null; while ((line = reader.readLine()) != null) { if (line.startsWith(ALIAS_KEY)) { parseAlias(line); } else if (line.startsWith(PREFIX_KEY)) { parseAffix(prefixes, line, reader, PREFIX_CONDITION_REGEX_PATTERN); } else if (line.startsWith(SUFFIX_KEY)) { parseAffix(suffixes, line, reader, SUFFIX_CONDITION_REGEX_PATTERN); } else if (line.startsWith(FLAG_KEY)) { // Assume that the FLAG line comes before any prefix or suffixes // Store the strategy so it can be used when parsing the dic file flagParsingStrategy = getFlagParsingStrategy(line); } } }
/** * Find the unique stem(s) of the provided word * * @param word Word to find the stems for * @return List of stems for the word */ public List<Stem> uniqueStems(char word[], int length) { List<Stem> stems = new ArrayList<Stem>(); CharArraySet terms = new CharArraySet(dictionary.getVersion(), 8, dictionary.isIgnoreCase()); if (dictionary.lookupWord(word, 0, length) != null) { stems.add(new Stem(word, length)); terms.add(word); } List<Stem> otherStems = stem(word, length, null, 0); for (Stem s : otherStems) { if (!terms.contains(s.stem)) { stems.add(s); terms.add(s.stem); } } return stems; }
if(dictionary.isIgnoreCase()) { for(int i=0;i<strippedWord.length;){ i += Character.toChars( List<HunspellWord> words = dictionary.lookupWord(strippedWord, 0, length); if (words != null) { for (HunspellWord hunspellWord : words) {
List<HunspellAffix> suffixes = dictionary.lookupSuffix(word, i, length - i); if (suffixes == null) { continue; List<HunspellAffix> prefixes = dictionary.lookupPrefix(word, 0, i); if (prefixes == null) { continue;
flagPart = getAliasValue(Integer.parseInt(flagPart));
/** * Find the stem(s) of the provided word * * @param word Word to find the stems for * @return List of stems for the word */ public List<Stem> stem(char word[], int length) { List<Stem> stems = new ArrayList<Stem>(); if (dictionary.lookupWord(word, 0, length) != null) { stems.add(new Stem(word, length)); } stems.addAll(stem(word, length, null, 0)); return stems; }
flagPart = getAliasValue(Integer.parseInt(flagPart));
@Inject public HunspellStemFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); this.name = name; this.locale = settings.get("locale", "en_US"); this.ignoreCase = settings.getAsBoolean("ignoreCase", Boolean.TRUE); this.dedup = settings.getAsBoolean("dedup", Boolean.TRUE); if (!locales.contains(locale)) { throw new ElasticSearchException("invalid locale '" + locale + "' for hunspell aff/dic"); } try { InputStream affixStream = HunspellStemFilterFactory.class.getResourceAsStream(locale + ".aff"); InputStream dictStream = HunspellStemFilterFactory.class.getResourceAsStream(locale + ".dic"); this.dictionary = new HunspellDictionary(affixStream, dictStream, version, ignoreCase); affixStream.close(); dictStream.close(); } catch (IOException ex) { logger.error("hunspell aff/dic stream I/O error for locale " + locale, ex); } catch (ParseException ex) { logger.error("hunspell aff/dic stream parse failure for locale " + locale, ex); } }