/** * This was the method found in the <i>com.cybozu.labs.langdetect.Detector</i> class, it was used to extract * grams from the to-analyze text. * * NOTE: although it adds the first ngram with space, it does not add the last n-gram with space. example: "foo" gives " fo" but not "oo "!. * It is not clear yet whether this is desired (and why) or a bug. * * TODO replace this algorithm with a simpler, faster one that uses less memory: only by position shifting. also, the returned list size * can be computed before making it (based on text length and number of n-grams). * */ @NotNull @Deprecated public static List<String> extractNGrams(@NotNull CharSequence text, @Nullable Filter filter) { List<String> list = new ArrayList<>(); NGram ngram = new NGram(); for(int i=0;i<text.length();++i) { ngram.addChar(text.charAt(i)); for(int n=1;n<=NGram.N_GRAM;++n){ String w = ngram.get(n); if (w!=null) { //TODO this null check is ugly if (filter==null || filter.use(w)) { list.add(w); } } } } return list; }
/** * This was the method found in the <i>com.cybozu.labs.langdetect.Detector</i> class, it was used to extract * grams from the to-analyze text. * * NOTE: although it adds the first ngram with space, it does not add the last n-gram with space. example: "foo" gives " fo" but not "oo "!. * It is not clear yet whether this is desired (and why) or a bug. * * TODO replace this algorithm with a simpler, faster one that uses less memory: only by position shifting. also, the returned list size * can be computed before making it (based on text length and number of n-grams). * */ @NotNull @Deprecated public static List<String> extractNGrams(@NotNull CharSequence text, @Nullable Filter filter) { List<String> list = new ArrayList<>(); NGram ngram = new NGram(); for(int i=0;i<text.length();++i) { ngram.addChar(text.charAt(i)); for(int n=1;n<=NGram.N_GRAM;++n){ String w = ngram.get(n); if (w!=null) { //TODO this null check is ugly if (filter==null || filter.use(w)) { list.add(w); } } } } return list; }