private boolean doublec(int j) { if (j < 1) { return false; } if (b[j] != b[j - 1]) { return false; } return cons(j); }
private void step1() { if (b[k] == 's') { if (ends("sses")) { k -= 2; } else if (ends("ies")) { setto("i"); } else if (b[k - 1] != 's') { k--; if (ends("eed")) { if (m() > 0) { k--; } else if ((ends("ed") || ends("ing")) && vowelinstem()) { k = j; if (ends("at")) { setto("ate"); } else if (ends("bl")) { setto("ble"); } else if (ends("iz")) { setto("ize"); } else if (doublec(k)) { k--; } else if (m() == 1 && cvc(k)) { setto("e");
private void step2() { if (ends("y") && vowelinstem()) { b[k] = 'i'; } }
public List<String> getStemmedPairs(final String text) throws IOException { String tmp = text.toLowerCase(); tmp = DiacriticsRemover.removeDiacritics(tmp); tmp = tmp.replaceAll("_", SPACE); tmp = tmp.replaceAll("\n", SPACE); tmp = tmp.replaceAll("[^a-z\\d-_/ ]", ""); List<String> strings = new ArrayList<String>(); PorterStemmer ps = new PorterStemmer(); for (String s : StringUtils.split(tmp, SPACE)) { if (!StopWordsRemover.isAnEnglishStopWords(s)) {; ps.add(s.toCharArray(), s.length()); ps.stem(); strings.add(ps.toString()); } } return strings; }
PorterStemmer ps = new PorterStemmer(); for (String s : StringUtils.split(tmp, SPACE)) { s = s.replaceAll("^[/\\-]+", ""); ps.add(s.toCharArray(), s.length()); ps.stem(); strings.add(ps.toString());
} /* For Bug 1 */ switch (b[k - 1]) { case 'a': if (ends("ational")) { r("ate"); break; if (ends("tional")) { r("tion"); break; if (ends("enci")) { r("ence"); break; if (ends("anci")) { r("ance"); break; if (ends("izer")) { r("ize"); break; if (ends("bli")) { r("ble"); break; if (ends("alli")) { r("al"); break;
PorterStemmer s = new PorterStemmer(); for (int i = 0; i < args.length; i++) { try { s.add(w[c]); s.stem(); u = s.toString();
PorterStemmer stemmer = new PorterStemmer(); for(String string : DiacriticsRemover.removeDiacritics(inputDataStringBuilder.toString()) .toLowerCase().split(" ")){ stemmer.add(string.toCharArray(), 0); stemmer.stem(); context.write(new TextArrayWritable(new Text[]{key, new Text(stemmer.toString())}), one);
private boolean vowelinstem() { int ii; for (ii = 0; ii <= j; ii++) { if (!cons(ii)) { return true; } } return false; }
vals_str = vals_str.replaceAll("[^a-z ]", ""); PorterStemmer ps = new PorterStemmer(); List<Tuple> alt = new ArrayList<Tuple>(); for (String s : vals_str.split(" ")) { ps.add(s.toCharArray(), s.length()); ps.stem(); String[] to = new String[]{key, ps.toString()}; alt.add(TupleFactory.getInstance().newTuple(Arrays.asList(to)));
private boolean cons(int i) { switch (b[i]) { case 'a': case 'e': case 'i': case 'o': case 'u': return false; case 'y': return (i == 0) ? true : !cons(i - 1); default: return true; } }