private static TokenStream loadTokenizer(TokenizerFactory tokFactory, Reader reader){ return tokFactory.create( reader ); }
private static TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname, Map<String,String> args){ TokenizerFactory tokFactory = (TokenizerFactory)loader.newInstance( cname ); tokFactory.init( args ); return tokFactory; }
private static SimpleOrderedMap<Object> getAnalyzerInfo(Analyzer analyzer) { SimpleOrderedMap<Object> aninfo = new SimpleOrderedMap<Object>(); aninfo.add("className", analyzer.getClass().getName()); if (analyzer instanceof TokenizerChain) { SimpleOrderedMap<Object> tokenizer = new SimpleOrderedMap<Object>(); TokenizerChain tchain = (TokenizerChain)analyzer; TokenizerFactory tfac = tchain.getTokenizerFactory(); tokenizer.add("className", tfac.getClass().getName()); tokenizer.add("args", tfac.getArgs()); aninfo.add("tokenizer", tokenizer); TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories(); SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<Map<String, Object>>(); for (TokenFilterFactory filtfac : filtfacs) { Map<String, Object> tok = new HashMap<String, Object>(); String className = filtfac.getClass().getName(); tok.put("className", className); tok.put("args", filtfac.getArgs()); filters.add(className.substring(className.lastIndexOf('.')+1), tok); } if (filters.size() > 0) { aninfo.add("filters", filters); } } return aninfo; }
@Override public TokenStreamInfo getStream(String fieldName, Reader reader) { Tokenizer tk = (Tokenizer)tokenizer.create(charStream(reader)); TokenStream ts = tk; for (int i=0; i<filters.length; i++) { ts = filters[i].create(ts); } return new TokenStreamInfo(tk,ts); }
@Override protected void init(TokenizerFactory plugin, Node node) throws Exception { if( !tokenizers.isEmpty() ) { throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, "The schema defines multiple tokenizers for: "+node ); } plugin.init( DOMUtil.toMapExcept(node.getAttributes(),"class") ); tokenizers.add( plugin ); }
TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value))); List<Token> tokens = analyzeTokenStream(tokenStream);