/** * Extracts the words from a string for simple fulltext indexing. * <p> * Initial order is kept, but duplicate words are removed. * <p> * It omits short or stop words, removes accents and does pseudo-stemming. * * @param string the string * @param removeDiacritics if the diacritics must be removed * @return an ordered set of resulting words */ public static Set<String> parseFullText(String string, boolean removeDiacritics) { if (string == null) { return Collections.emptySet(); } Set<String> set = new LinkedHashSet<>(); for (String word : wordPattern.split(string)) { String w = parseWord(word, removeDiacritics); if (w != null) { set.add(w); } } return set; }
Set<String> words = FullTextUtils.parseFullText(value, false); if (words.isEmpty()) {