/** * Append the mandatory SIREn filters, i.e., * {@link DatatypeAnalyzerFilterFactory}, * {@link PositionAttributeFilterFactory} and * {@link SirenPayloadFilterFactory}, to the tokenizer chain. */ private Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes, final Version luceneDefaultVersion) { if (!(analyzer instanceof TokenizerChain)) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid index analyzer '" + analyzer.getClass() + "' received"); } final TokenizerChain chain = (TokenizerChain) analyzer; // copy the existing list of token filters final TokenFilterFactory[] old = chain.getTokenFilterFactories(); final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 3]; System.arraycopy(old, 0, filterFactories, 0, old.length); // append the datatype analyzer filter factory final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory(luceneDefaultVersion); datatypeFactory.register(datatypes); filterFactories[old.length] = datatypeFactory; // append the position attribute filter factory filterFactories[old.length + 1] = new PositionAttributeFilterFactory(); // append the siren payload filter factory filterFactories[old.length + 2] = new SirenPayloadFilterFactory(); // create a new tokenizer chain with the updated list of filter factories return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories); }
@Override public TokenStreamInfo getStream(String fieldName, Reader reader) { Tokenizer tk = (Tokenizer)tokenizer.create(charStream(reader)); TokenStream ts = tk; for (int i=0; i<filters.length; i++) { ts = filters[i].create(ts); } return new TokenStreamInfo(tk,ts); }
private static SimpleOrderedMap<Object> getAnalyzerInfo(Analyzer analyzer) { SimpleOrderedMap<Object> aninfo = new SimpleOrderedMap<Object>(); aninfo.add("className", analyzer.getClass().getName()); if (analyzer instanceof TokenizerChain) { SimpleOrderedMap<Object> tokenizer = new SimpleOrderedMap<Object>(); TokenizerChain tchain = (TokenizerChain)analyzer; TokenizerFactory tfac = tchain.getTokenizerFactory(); tokenizer.add("className", tfac.getClass().getName()); tokenizer.add("args", tfac.getArgs()); aninfo.add("tokenizer", tokenizer); TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories(); SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<Map<String, Object>>(); for (TokenFilterFactory filtfac : filtfacs) { Map<String, Object> tok = new HashMap<String, Object>(); String className = filtfac.getClass().getName(); tok.put("className", className); tok.put("args", filtfac.getArgs()); filters.add(className.substring(className.lastIndexOf('.')+1), tok); } if (filters.size() > 0) { aninfo.add("filters", filters); } } return aninfo; }
CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories(); TokenizerFactory tfac = tokenizerChain.getTokenizerFactory(); TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories(); TokenStream tokenStream = tfac.create(tokenizerChain.charStream(new StringReader(value))); List<Token> tokens = analyzeTokenStream(tokenStream);
protected void checkAllowLeadingWildcards() { boolean allow = false; for (Entry<String, FieldType> e : schema.getFieldTypes().entrySet()) { Analyzer a = e.getValue().getAnalyzer(); if (a instanceof TokenizerChain) { // examine the indexing analysis chain if it supports leading wildcards TokenizerChain tc = (TokenizerChain)a; TokenFilterFactory[] factories = tc.getTokenFilterFactories(); for (TokenFilterFactory factory : factories) { if (factory instanceof ReversedWildcardFilterFactory) { allow = true; leadingWildcards.put(e.getKey(), (ReversedWildcardFilterFactory)factory); } } } } // XXX should be enabled on a per-field basis if (allow) { setAllowLeadingWildcard(true); } }
@Override protected void init(IndexSchema schema, Map<String, String> args) { String p = args.remove("precisionStep"); if (p != null) { precisionStepArg = Integer.parseInt(p); } // normalize the precisionStep precisionStep = precisionStepArg; if (precisionStep<=0 || precisionStep>=64) precisionStep=Integer.MAX_VALUE; CharFilterFactory[] filterFactories = new CharFilterFactory[0]; TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0]; analyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(TrieField.TrieTypes.DATE, precisionStep), tokenFilterFactories); // for query time we only need one token, so we use the biggest possible precisionStep: queryAnalyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(TrieField.TrieTypes.DATE, Integer.MAX_VALUE), tokenFilterFactories); }
@Test public void testSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof ExtendedJsonTokenizerFactory); // 3 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(3, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof SirenPayloadFilterFactory); }
private boolean fieldHasIndexedStopFilter(String field, SolrQueryRequest req) { FieldType fieldType = req.getSchema().getFieldType(field); Analyzer analyzer = fieldType.getIndexAnalyzer();//index analyzer if (analyzer instanceof TokenizerChain) { TokenizerChain tokenizerChain = (TokenizerChain) analyzer; TokenFilterFactory[] tokenFilterFactories = tokenizerChain.getTokenFilterFactories(); for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) { if (tokenFilterFactory instanceof StopFilterFactory) return true; } } return false; }
@Override protected void init(IndexSchema schema, Map<String, String> args) { String p = args.remove("precisionStep"); if (p != null) { precisionStepArg = Integer.parseInt(p); } // normalize the precisionStep precisionStep = precisionStepArg; if (precisionStep<=0 || precisionStep>=64) precisionStep=Integer.MAX_VALUE; String t = args.remove("type"); if (t != null) { try { type = TrieTypes.valueOf(t.toUpperCase()); } catch (IllegalArgumentException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid type specified in schema.xml for field: " + args.get("name"), e); } } CharFilterFactory[] filterFactories = new CharFilterFactory[0]; TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0]; analyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(type, precisionStep), tokenFilterFactories); // for query time we only need one token, so we use the biggest possible precisionStep: queryAnalyzer = new TokenizerChain(filterFactories, new TrieTokenizerFactory(type, Integer.MAX_VALUE), tokenFilterFactories); }
final TokenFilterFactory[] old = chain.getTokenFilterFactories(); final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 3]; System.arraycopy(old, 0, filterFactories, 0, old.length); return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories);
@Test public void testConciseSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField json = schema.getField("concise"); final FieldType tmp = json.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof ConciseJsonTokenizerFactory); // 4 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(4, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PathEncodingFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[3] instanceof SirenPayloadFilterFactory); }
private boolean fieldHasIndexedStopFilter(String field, SolrQueryRequest req) { FieldType fieldType = req.getSchema().getFieldType(field); Analyzer analyzer = fieldType.getIndexAnalyzer();//index analyzer if (analyzer instanceof TokenizerChain) { TokenizerChain tokenizerChain = (TokenizerChain) analyzer; TokenFilterFactory[] tokenFilterFactories = tokenizerChain.getTokenFilterFactories(); for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) { if (tokenFilterFactory instanceof StopFilterFactory) return true; } } return false; }
filterLoader.load( loader, (NodeList)xpath.evaluate("./filter", node, XPathConstants.NODESET) ); return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]), tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()])); };
final TokenFilterFactory[] old = chain.getTokenFilterFactories(); final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 4]; System.arraycopy(old, 0, filterFactories, 0, old.length); return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories);
@Test public void testSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof JsonTokenizerFactory); // 3 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(3, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof SirenPayloadFilterFactory); // no query analyzer assertNull(tmp.getQueryAnalyzer()); }
if (fieldAnalyzer instanceof TokenizerChain) { final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer).getTokenFilterFactories(); for (TokenFilterFactory factory : filterFactories) { if (factory instanceof StopFilterFactory) {
TokenizerChain analyzer = new TokenizerChain(tokenizerFactory, filterFactories.toArray(new TokenFilterFactory[filterFactories.size()]));
@Test public void testSirenFieldDatatypeAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[0]; assertNotNull(f.getDatatypeAnalyzers()); assertEquals(9, f.getDatatypeAnalyzers().size()); assertNotNull(f.getDatatypeAnalyzers().get("http://json.org/field")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://json.org/field"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int")); assertTrue(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int") instanceof IntNumericAnalyzer); final IntNumericAnalyzer a = (IntNumericAnalyzer) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int"); assertEquals(8, a.getPrecisionStep()); assertEquals(32, a.getNumericParser().getValueSize()); assertEquals(NumericType.INT, a.getNumericParser().getNumericType()); }
/** * Load the config when resource loader initialized. * * @param resourceLoader The resource loader. */ @Override public void inform(final ResourceLoader resourceLoader) { super.inform(resourceLoader); // if there was a attributeWildcard parameter defined, updates the configuration of the PathEncodingFilterFactory if (this.hasAttributeWildcard) { final TokenizerChain chain = (TokenizerChain) this.getIndexAnalyzer(); for (TokenFilterFactory tokenFilterFactory : chain.getTokenFilterFactories()) { if (tokenFilterFactory instanceof PathEncodingFilterFactory) { ((PathEncodingFilterFactory) tokenFilterFactory).setAttributeWildcard(this.attributeWildcard); } } } }
filterLoader.load(loader, tokenFilterNodes); return new TokenizerChain(charFilters.toArray(new CharFilterFactory[charFilters.size()]), tokenizers.get(0), filters.toArray(new TokenFilterFactory[filters.size()]));