public List<String> getStemmedPairs(final String text) throws IOException { String tmp = text.toLowerCase(); tmp = DiacriticsRemover.removeDiacritics(tmp); tmp = tmp.replaceAll("_", SPACE); tmp = tmp.replaceAll("\n", SPACE); tmp = tmp.replaceAll("[^a-z\\d-_/ ]", ""); List<String> strings = new ArrayList<String>(); PorterStemmer ps = new PorterStemmer(); for (String s : StringUtils.split(tmp, SPACE)) { if (!StopWordsRemover.isAnEnglishStopWords(s)) {; ps.add(s.toCharArray(), s.length()); ps.stem(); strings.add(ps.toString()); } } return strings; }
ps.add(s.toCharArray(), s.length()); ps.stem(); strings.add(ps.toString());
for(String string : DiacriticsRemover.removeDiacritics(inputDataStringBuilder.toString()) .toLowerCase().split(" ")){ stemmer.add(string.toCharArray(), 0); stemmer.stem();
ps.add(s.toCharArray(), s.length()); ps.stem(); String[] to = new String[]{key, ps.toString()};