protected void checkAllowLeadingWildcards() { boolean allow = false; for (Entry<String, FieldType> e : schema.getFieldTypes().entrySet()) { Analyzer a = e.getValue().getAnalyzer(); if (a instanceof TokenizerChain) { // examine the indexing analysis chain if it supports leading wildcards TokenizerChain tc = (TokenizerChain)a; TokenFilterFactory[] factories = tc.getTokenFilterFactories(); for (TokenFilterFactory factory : factories) { if (factory instanceof ReversedWildcardFilterFactory) { allow = true; leadingWildcards.put(e.getKey(), (ReversedWildcardFilterFactory)factory); } } } } // XXX should be enabled on a per-field basis if (allow) { setAllowLeadingWildcard(true); } }
private boolean fieldHasIndexedStopFilter(String field, SolrQueryRequest req) { FieldType fieldType = req.getSchema().getFieldType(field); Analyzer analyzer = fieldType.getIndexAnalyzer();//index analyzer if (analyzer instanceof TokenizerChain) { TokenizerChain tokenizerChain = (TokenizerChain) analyzer; TokenFilterFactory[] tokenFilterFactories = tokenizerChain.getTokenFilterFactories(); for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) { if (tokenFilterFactory instanceof StopFilterFactory) return true; } } return false; }
private boolean fieldHasIndexedStopFilter(String field, SolrQueryRequest req) { FieldType fieldType = req.getSchema().getFieldType(field); Analyzer analyzer = fieldType.getIndexAnalyzer();//index analyzer if (analyzer instanceof TokenizerChain) { TokenizerChain tokenizerChain = (TokenizerChain) analyzer; TokenFilterFactory[] tokenFilterFactories = tokenizerChain.getTokenFilterFactories(); for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) { if (tokenFilterFactory instanceof StopFilterFactory) return true; } } return false; }
if (fieldAnalyzer instanceof TokenizerChain) { final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer).getTokenFilterFactories(); for (TokenFilterFactory factory : filterFactories) { if (factory instanceof StopFilterFactory) {
/** * Load the config when resource loader initialized. * * @param resourceLoader The resource loader. */ @Override public void inform(final ResourceLoader resourceLoader) { super.inform(resourceLoader); // if there was a attributeWildcard parameter defined, updates the configuration of the PathEncodingFilterFactory if (this.hasAttributeWildcard) { final TokenizerChain chain = (TokenizerChain) this.getIndexAnalyzer(); for (TokenFilterFactory tokenFilterFactory : chain.getTokenFilterFactories()) { if (tokenFilterFactory instanceof PathEncodingFilterFactory) { ((PathEncodingFilterFactory) tokenFilterFactory).setAttributeWildcard(this.attributeWildcard); } } } }
final TokenFilterFactory[] old = chain.getTokenFilterFactories(); final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 4]; System.arraycopy(old, 0, filterFactories, 0, old.length);
private static SimpleOrderedMap<Object> getAnalyzerInfo(Analyzer analyzer) { SimpleOrderedMap<Object> aninfo = new SimpleOrderedMap<Object>(); aninfo.add("className", analyzer.getClass().getName()); if (analyzer instanceof TokenizerChain) { SimpleOrderedMap<Object> tokenizer = new SimpleOrderedMap<Object>(); TokenizerChain tchain = (TokenizerChain)analyzer; TokenizerFactory tfac = tchain.getTokenizerFactory(); tokenizer.add("className", tfac.getClass().getName()); tokenizer.add("args", tfac.getArgs()); aninfo.add("tokenizer", tokenizer); TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories(); SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<Map<String, Object>>(); for (TokenFilterFactory filtfac : filtfacs) { Map<String, Object> tok = new HashMap<String, Object>(); String className = filtfac.getClass().getName(); tok.put("className", className); tok.put("args", filtfac.getArgs()); filters.add(className.substring(className.lastIndexOf('.')+1), tok); } if (filters.size() > 0) { aninfo.add("filters", filters); } } return aninfo; }
final TokenFilterFactory[] old = chain.getTokenFilterFactories(); final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 3]; System.arraycopy(old, 0, filterFactories, 0, old.length);
/** * Load the datatype config when resource loader initialized. * * @param resourceLoader The resource loader. */ @Override public void inform(final ResourceLoader resourceLoader) { // load the datatypes this.loadDatatypeConfig((SolrResourceLoader) resourceLoader); // Register the datatypes in the DatatypeAnalyzerFilterFactory instance final TokenizerChain chain = (TokenizerChain) this.getIndexAnalyzer(); for (TokenFilterFactory tokenFilterFactory : chain.getTokenFilterFactories()) { if (tokenFilterFactory instanceof DatatypeAnalyzerFilterFactory) { ((DatatypeAnalyzerFilterFactory) tokenFilterFactory).register(this.getDatatypes()); } } }
/** * Append the mandatory SIREn filters, i.e., * {@link DatatypeAnalyzerFilterFactory}, * {@link PositionAttributeFilterFactory} and * {@link SirenPayloadFilterFactory}, to the tokenizer chain. */ private Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes, final Version luceneDefaultVersion) { if (!(analyzer instanceof TokenizerChain)) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid index analyzer '" + analyzer.getClass() + "' received"); } final TokenizerChain chain = (TokenizerChain) analyzer; // copy the existing list of token filters final TokenFilterFactory[] old = chain.getTokenFilterFactories(); final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 3]; System.arraycopy(old, 0, filterFactories, 0, old.length); // append the datatype analyzer filter factory final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory(luceneDefaultVersion); datatypeFactory.register(datatypes); filterFactories[old.length] = datatypeFactory; // append the position attribute filter factory filterFactories[old.length + 1] = new PositionAttributeFilterFactory(); // append the siren payload filter factory filterFactories[old.length + 2] = new SirenPayloadFilterFactory(); // create a new tokenizer chain with the updated list of filter factories return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories); }
@Test public void testConciseSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField json = schema.getField("concise"); final FieldType tmp = json.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof ConciseJsonTokenizerFactory); // 4 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(4, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PathEncodingFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[3] instanceof SirenPayloadFilterFactory); }
CharFilterFactory[] cfiltfacs = tokenizerChain.getCharFilterFactories(); TokenizerFactory tfac = tokenizerChain.getTokenizerFactory(); TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories();
@Test public void testSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof ExtendedJsonTokenizerFactory); // 3 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(3, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof SirenPayloadFilterFactory); }
@Test public void testSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof JsonTokenizerFactory); // 3 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(3, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof SirenPayloadFilterFactory); // no query analyzer assertNull(tmp.getQueryAnalyzer()); }
@Test public void testSirenFieldDatatypeAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[0]; assertNotNull(f.getDatatypeAnalyzers()); assertEquals(9, f.getDatatypeAnalyzers().size()); assertNotNull(f.getDatatypeAnalyzers().get("http://json.org/field")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://json.org/field"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int")); assertTrue(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int") instanceof IntNumericAnalyzer); final IntNumericAnalyzer a = (IntNumericAnalyzer) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int"); assertEquals(8, a.getPrecisionStep()); assertEquals(32, a.getNumericParser().getValueSize()); assertEquals(NumericType.INT, a.getNumericParser().getNumericType()); }
@Test public void testSirenFieldDatatypeAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[0]; assertNotNull(f.getDatatypeAnalyzers()); assertEquals(9, f.getDatatypeAnalyzers().size()); assertNotNull(f.getDatatypeAnalyzers().get("http://json.org/field")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://json.org/field"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int")); assertTrue(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int") instanceof IntNumericAnalyzer); final IntNumericAnalyzer a = (IntNumericAnalyzer) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int"); assertEquals(8, a.getPrecisionStep()); assertEquals(32, a.getNumericParser().getValueSize()); assertEquals(NumericType.INT, a.getNumericParser().getNumericType()); }