/** * Creates a set from a Collection of objects. * * @param c * a collection whose elements to be placed into the set * @param ignoreCase * <code>false</code> if and only if the set should be case sensitive * otherwise <code>true</code>. */ public CharArraySet(Collection<?> c, boolean ignoreCase) { this(c.size(), ignoreCase); addAll(c); }
/** * Returns the set of stop words being used. * * @return the set of stop words being used */ public static CharArraySet getStopWords() { final CharArraySet words = StopFilter.makeStopSet(ADDITIONAL_STOP_WORDS, true); words.addAll(StopAnalyzer.ENGLISH_STOP_WORDS_SET); return words; }
/** * Creates a stopword set from the given stopword array. * * @param stopWords An array of stopwords * @param ignoreCase If true, all words are lower cased first. * @return a Set containing the words */ public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) { CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase); stopSet.addAll(Arrays.asList(stopWords)); return stopSet; }
/** * Creates a stopword set from the given stopword list. * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords * @param ignoreCase if true, all words are lower cased first * @return A Set ({@link CharArraySet}) containing the words */ public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase){ CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase); stopSet.addAll(stopWords); return stopSet; }
private static CharArraySet resolveNamedWords(Collection<String> words, Map<String, Set<?>> namedWords, boolean ignoreCase) { if (namedWords == null) { return new CharArraySet(words, ignoreCase); } CharArraySet setWords = new CharArraySet(words.size(), ignoreCase); for (String word : words) { if (namedWords.containsKey(word)) { setWords.addAll(namedWords.get(word)); } else { setWords.add(word); } } return setWords; }
/** * Returns as {@link CharArraySet} from wordFiles, which * can be a comma-separated list of filenames */ protected final CharArraySet getWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase) throws IOException { List<String> files = splitFileNames(wordFiles); CharArraySet words = null; if (files.size() > 0) { // default stopwords list has 35 or so words, but maybe don't make it that // big to start words = new CharArraySet(files.size() * 10, ignoreCase); for (String file : files) { List<String> wlist = getLines(loader, file.trim()); words.addAll(StopFilter.makeStopSet(wlist, ignoreCase)); } } return words; }
/** Creates a new CapitalizationFilterFactory */ public CapitalizationFilterFactory(Map<String, String> args) { super(args); boolean ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false); Set<String> k = getSet(args, KEEP); if (k != null) { keep = new CharArraySet(10, ignoreCase); keep.addAll(k); } k = getSet(args, OK_PREFIX); if (k != null) { okPrefix = new ArrayList<>(); for (String item : k) { okPrefix.add(item.toCharArray()); } } minWordLength = getInt(args, MIN_WORD_LENGTH, 0); maxWordCount = getInt(args, MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT); maxTokenLength = getInt(args, MAX_TOKEN_LENGTH, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH); onlyFirstWord = getBoolean(args, ONLY_FIRST_WORD, true); forceFirstLetter = getBoolean(args, FORCE_FIRST_LETTER, true); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
/** Create set from a Collection of char[] or String */ public CharArraySet(Collection c, boolean ignoreCase) { this(c.size(), ignoreCase); addAll(c); }
/** Create set from a Collection of char[] or String */ public CharArraySet(Collection c, boolean ignoreCase) { this(c.size(), ignoreCase); addAll(c); }
/** * Creates a set from a Collection of objects. * * @param c * a collection whose elements to be placed into the set * @param ignoreCase * <code>false</code> if and only if the set should be case sensitive * otherwise <code>true</code>. */ public CharArraySet(Collection<?> c, boolean ignoreCase) { this(c.size(), ignoreCase); addAll(c); }
/** * * @param stopWords * @param ignoreCase If true, all words are lower cased first. * @return a Set containing the words */ public static final Set makeStopSet(String[] stopWords, boolean ignoreCase) { CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase); stopSet.addAll(Arrays.asList(stopWords)); return stopSet; }
/** * Returns the set of stop words being used. * * @return the set of stop words being used */ public static CharArraySet getStopWords() { final CharArraySet words = StopFilter.makeStopSet(ADDITIONAL_STOP_WORDS, true); words.addAll(StopAnalyzer.ENGLISH_STOP_WORDS_SET); return words; }
/** * Creates a stopword set from the given stopword list. * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords * @param ignoreCase if true, all words are lower cased first * @return A Set ({@link CharArraySet}) containing the words */ public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase){ CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase); stopSet.addAll(stopWords); return stopSet; }
/** * Creates a stopword set from the given stopword array. * * @param stopWords An array of stopwords * @param ignoreCase If true, all words are lower cased first. * @return a Set containing the words */ public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) { CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase); stopSet.addAll(Arrays.asList(stopWords)); return stopSet; }
/** * * @param stopWords * @param ignoreCase If true, all words are lower cased first. * @return a Set containing the words */ public static final Set makeStopSet(String[] stopWords, boolean ignoreCase) { CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase); stopSet.addAll(Arrays.asList(stopWords)); return stopSet; }
public static CharArraySet makeStopSet(List stopWords) { CharArraySet stopSet = new CharArraySet(stopWords.size(), true); stopSet.addAll(stopWords); return stopSet; }
/** * Build a CharArraySet from an array of common words, appropriate for passing * into the CommonGramsFilter constructor,case-sensitive if ignoreCase is * false. * * @param commonWords * @param ignoreCase If true, all words are lower cased first. * @return a Set containing the words */ public static final CharArraySet makeCommonSet(String[] commonWords, boolean ignoreCase) { CharArraySet commonSet = new CharArraySet(commonWords.length, ignoreCase); commonSet.addAll(Arrays.asList(commonWords)); return commonSet; }
private static CharArraySet resolveNamedWords(Collection<String> words, Map<String, Set<?>> namedWords, boolean ignoreCase) { if (namedWords == null) { return new CharArraySet(words, ignoreCase); } CharArraySet setWords = new CharArraySet(words.size(), ignoreCase); for (String word : words) { if (namedWords.containsKey(word)) { setWords.addAll(namedWords.get(word)); } else { setWords.add(word); } } return setWords; }
private static CharArraySet resolveNamedWords(Collection<String> words, Map<String, Set<?>> namedWords, boolean ignoreCase) { if (namedWords == null) { return new CharArraySet(words, ignoreCase); } CharArraySet setWords = new CharArraySet(words.size(), ignoreCase); for (String word : words) { if (namedWords.containsKey(word)) { setWords.addAll(namedWords.get(word)); } else { setWords.add(word); } } return setWords; }
private static CharArraySet resolveNamedWords(Collection<String> words, Map<String, Set<?>> namedWords, boolean ignoreCase) { if (namedWords == null) { return new CharArraySet(words, ignoreCase); } CharArraySet setWords = new CharArraySet(words.size(), ignoreCase); for (String word : words) { if (namedWords.containsKey(word)) { setWords.addAll(namedWords.get(word)); } else { setWords.add(word); } } return setWords; }