public void inform(ResourceLoader loader) { mapping = args.get( "mapping" ); if( mapping != null ){ List<String> wlist = null; try{ File mappingFile = new File( mapping ); if( mappingFile.exists() ){ wlist = loader.getLines( mapping ); } else{ List<String> files = StrUtils.splitFileNames( mapping ); wlist = new ArrayList<String>(); for( String file : files ){ List<String> lines = loader.getLines( file.trim() ); wlist.addAll( lines ); } } } catch( IOException e ){ throw new RuntimeException( e ); } normMap = new NormalizeCharMap(); parseRules( wlist, normMap ); } }
@SuppressWarnings("unchecked") public void inform(ResourceLoader loader) { String wordFiles = args.get("words"); ignoreCase = getBoolean("ignoreCase", false); if (wordFiles != null) { try { List<String> files = StrUtils.splitFileNames(wordFiles); if (words == null && files.size() > 0){ words = new CharArraySet(files.size() * 10, ignoreCase); } for (String file : files) { List<String> wlist = loader.getLines(file.trim()); //TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call words.addAll(StopFilter.makeStopSet((String[]) wlist.toArray(new String[0]), ignoreCase)); } } catch (IOException e) { throw new RuntimeException(e); } } }
public void inform(ResourceLoader loader) { String commonWordFiles = args.get("words"); ignoreCase = getBoolean("ignoreCase", false); if (commonWordFiles != null) { try { List<String> files = StrUtils.splitFileNames(commonWordFiles); if (commonWords == null && files.size() > 0){ //default stopwords list has 35 or so words, but maybe don't make it that big to start commonWords = new CharArraySet(files.size() * 10, ignoreCase); } for (String file : files) { List<String> wlist = loader.getLines(file.trim()); //TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call commonWords.addAll(CommonGramsFilter.makeCommonSet((String[])wlist.toArray(new String[0]), ignoreCase)); } } catch (IOException e) { throw new RuntimeException(e); } } else { commonWords = (CharArraySet) CommonGramsFilter.makeCommonSet(StopAnalyzer.ENGLISH_STOP_WORDS, ignoreCase); } }
public void inform(ResourceLoader loader) { String wordFiles = args.get(PROTECTED_TOKENS); if (wordFiles != null) { try { File protectedWordFiles = new File(wordFiles); if (protectedWordFiles.exists()) { List<String> wlist = loader.getLines(wordFiles); //This cast is safe in Lucene protectedWords = new CharArraySet(wlist, false);//No need to go through StopFilter as before, since it just uses a List internally } else { List<String> files = StrUtils.splitFileNames(wordFiles); for (String file : files) { List<String> wlist = loader.getLines(file.trim()); if (protectedWords == null) protectedWords = new CharArraySet(wlist, false); else protectedWords.addAll(wlist); } } } catch (IOException e) { throw new RuntimeException(e); } } }
public void inform(ResourceLoader loader) { String wordFiles = args.get(PROTECTED_TOKENS); if (wordFiles != null) { try { File protectedWordFiles = new File(wordFiles); if (protectedWordFiles.exists()) { List<String> wlist = loader.getLines(wordFiles); //This cast is safe in Lucene protectedWords = new CharArraySet(wlist, false);//No need to go through StopFilter as before, since it just uses a List internally } else { List<String> files = StrUtils.splitFileNames(wordFiles); for (String file : files) { List<String> wlist = loader.getLines(file.trim()); if (protectedWords == null) protectedWords = new CharArraySet(wlist, false); else protectedWords.addAll(wlist); } } } catch (IOException e) { throw new RuntimeException(e); } } }
public void inform(ResourceLoader loader) { String commonWordFiles = args.get("words"); ignoreCase = getBoolean("ignoreCase", false); if (commonWordFiles != null) { try { List<String> files = StrUtils.splitFileNames(commonWordFiles); if (commonWords == null && files.size() > 0) { // default stopwords list has 35 or so words, but maybe don't make it // that big to start commonWords = new CharArraySet(files.size() * 10, ignoreCase); } for (String file : files) { List<String> wlist = loader.getLines(file.trim()); // TODO: once StopFilter.makeStopSet(List) method is available, switch // to using that so we can avoid a toArray() call commonWords.addAll(CommonGramsFilter.makeCommonSet((String[]) wlist .toArray(new String[0]), ignoreCase)); } } catch (IOException e) { throw new RuntimeException(e); } } else { commonWords = (CharArraySet) CommonGramsFilter.makeCommonSet( StopAnalyzer.ENGLISH_STOP_WORDS, ignoreCase); } }
public void inform(ResourceLoader loader) { String wordFiles = args.get(PROTECTED_TOKENS); if (wordFiles != null) { try { File protectedWordFiles = new File(wordFiles); if (protectedWordFiles.exists()) { List<String> wlist = loader.getLines(wordFiles); //This cast is safe in Lucene protectedWords = new CharArraySet(wlist, false);//No need to go through StopFilter as before, since it just uses a List internally } else { List<String> files = StrUtils.splitFileNames(wordFiles); for (String file : files) { List<String> wlist = loader.getLines(file.trim()); if (protectedWords == null) protectedWords = new CharArraySet(wlist, false); else protectedWords.addAll(wlist); } } } catch (IOException e) { throw new RuntimeException(e); } } }
public void inform(ResourceLoader loader) { String stopWordFiles = args.get("words"); ignoreCase = getBoolean("ignoreCase",false); enablePositionIncrements = getBoolean("enablePositionIncrements",false); if (stopWordFiles != null) { try { List<String> files = StrUtils.splitFileNames(stopWordFiles); if (stopWords == null && files.size() > 0){ //default stopwords list has 35 or so words, but maybe don't make it that big to start stopWords = new CharArraySet(files.size() * 10, ignoreCase); } for (String file : files) { List<String> wlist = loader.getLines(file.trim()); //TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call stopWords.addAll(StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), ignoreCase)); } } catch (IOException e) { throw new RuntimeException(e); } } else { stopWords = (CharArraySet) StopFilter.makeStopSet(StopAnalyzer.ENGLISH_STOP_WORDS, ignoreCase); } } //Force the use of a char array set, as it is the most performant, although this may break things if Lucene ever goes away from it. See SOLR-1095
public void inform(ResourceLoader loader) { String synonyms = args.get("synonyms"); boolean ignoreCase = getBoolean("ignoreCase", false); boolean expand = getBoolean("expand", true); String tf = args.get("tokenizerFactory"); TokenizerFactory tokFactory = null; if( tf != null ){ tokFactory = loadTokenizerFactory( loader, tf, args ); } if (synonyms != null) { List<String> wlist=null; try { File synonymFile = new File(synonyms); if (synonymFile.exists()) { wlist = loader.getLines(synonyms); } else { List<String> files = StrUtils.splitFileNames(synonyms); wlist = new ArrayList<String>(); for (String file : files) { List<String> lines = loader.getLines(file.trim()); wlist.addAll(lines); } } } catch (IOException e) { throw new RuntimeException(e); } synMap = new SynonymMap(ignoreCase); parseRules(wlist, synMap, "=>", ",", expand,tokFactory); } }