return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories);
private static SimpleOrderedMap<Object> getAnalyzerInfo(Analyzer analyzer) { SimpleOrderedMap<Object> aninfo = new SimpleOrderedMap<Object>(); aninfo.add("className", analyzer.getClass().getName()); if (analyzer instanceof TokenizerChain) { SimpleOrderedMap<Object> tokenizer = new SimpleOrderedMap<Object>(); TokenizerChain tchain = (TokenizerChain)analyzer; TokenizerFactory tfac = tchain.getTokenizerFactory(); tokenizer.add("className", tfac.getClass().getName()); tokenizer.add("args", tfac.getArgs()); aninfo.add("tokenizer", tokenizer); TokenFilterFactory[] filtfacs = tchain.getTokenFilterFactories(); SimpleOrderedMap<Map<String, Object>> filters = new SimpleOrderedMap<Map<String, Object>>(); for (TokenFilterFactory filtfac : filtfacs) { Map<String, Object> tok = new HashMap<String, Object>(); String className = filtfac.getClass().getName(); tok.put("className", className); tok.put("args", filtfac.getArgs()); filters.add(className.substring(className.lastIndexOf('.')+1), tok); } if (filters.size() > 0) { aninfo.add("filters", filters); } } return aninfo; }
chain.getTokenizerFactory(), filterFactories);
/** * Append the mandatory SIREn filters, i.e., * {@link DatatypeAnalyzerFilterFactory}, * {@link PositionAttributeFilterFactory} and * {@link SirenPayloadFilterFactory}, to the tokenizer chain. */ private Analyzer appendSirenFilters(final Analyzer analyzer, final Map<String, Datatype> datatypes, final Version luceneDefaultVersion) { if (!(analyzer instanceof TokenizerChain)) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid index analyzer '" + analyzer.getClass() + "' received"); } final TokenizerChain chain = (TokenizerChain) analyzer; // copy the existing list of token filters final TokenFilterFactory[] old = chain.getTokenFilterFactories(); final TokenFilterFactory[] filterFactories = new TokenFilterFactory[old.length + 3]; System.arraycopy(old, 0, filterFactories, 0, old.length); // append the datatype analyzer filter factory final DatatypeAnalyzerFilterFactory datatypeFactory = new DatatypeAnalyzerFilterFactory(luceneDefaultVersion); datatypeFactory.register(datatypes); filterFactories[old.length] = datatypeFactory; // append the position attribute filter factory filterFactories[old.length + 1] = new PositionAttributeFilterFactory(); // append the siren payload filter factory filterFactories[old.length + 2] = new SirenPayloadFilterFactory(); // create a new tokenizer chain with the updated list of filter factories return new TokenizerChain(chain.getCharFilterFactories(), chain.getTokenizerFactory(), filterFactories); }
TokenizerFactory tfac = tokenizerChain.getTokenizerFactory(); TokenFilterFactory[] filtfacs = tokenizerChain.getTokenFilterFactories();
@Test public void testSirenFieldDatatypeAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[0]; assertNotNull(f.getDatatypeAnalyzers()); assertEquals(9, f.getDatatypeAnalyzers().size()); assertNotNull(f.getDatatypeAnalyzers().get("http://json.org/field")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://json.org/field"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int")); assertTrue(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int") instanceof IntNumericAnalyzer); final IntNumericAnalyzer a = (IntNumericAnalyzer) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int"); assertEquals(8, a.getPrecisionStep()); assertEquals(32, a.getNumericParser().getValueSize()); assertEquals(NumericType.INT, a.getNumericParser().getNumericType()); }
@Test public void testSirenFieldDatatypeAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); final DatatypeAnalyzerFilterFactory f = (DatatypeAnalyzerFilterFactory) ts.getTokenFilterFactories()[0]; assertNotNull(f.getDatatypeAnalyzers()); assertEquals(9, f.getDatatypeAnalyzers().size()); assertNotNull(f.getDatatypeAnalyzers().get("http://json.org/field")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://json.org/field"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof WhitespaceTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string")); ts = (TokenizerChain) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#string"); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof UAX29URLEmailTokenizerFactory); assertNotNull(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int")); assertTrue(f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int") instanceof IntNumericAnalyzer); final IntNumericAnalyzer a = (IntNumericAnalyzer) f.getDatatypeAnalyzers().get("http://www.w3.org/2001/XMLSchema#int"); assertEquals(8, a.getPrecisionStep()); assertEquals(32, a.getNumericParser().getValueSize()); assertEquals(NumericType.INT, a.getNumericParser().getNumericType()); }
@Test public void testSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof ExtendedJsonTokenizerFactory); // 3 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(3, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof SirenPayloadFilterFactory); }
@Test public void testConciseSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getLatestSchema(); final SchemaField json = schema.getField("concise"); final FieldType tmp = json.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof ConciseJsonTokenizerFactory); // 4 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(4, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PathEncodingFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[3] instanceof SirenPayloadFilterFactory); }
@Test public void testSirenFieldAnalyzer() throws Exception { final IndexSchema schema = h.getCore().getSchema(); final SchemaField ntriple = schema.getField(JSON_FIELD); final FieldType tmp = ntriple.getType(); assertTrue(tmp.getAnalyzer() instanceof TokenizerChain); final TokenizerChain ts = (TokenizerChain) tmp.getAnalyzer(); assertNotNull(ts.getTokenizerFactory()); assertTrue(ts.getTokenizerFactory() instanceof JsonTokenizerFactory); // 3 filters for index analyzer assertNotNull(ts.getTokenFilterFactories()); assertEquals(3, ts.getTokenFilterFactories().length); assertTrue(ts.getTokenFilterFactories()[0] instanceof DatatypeAnalyzerFilterFactory); assertTrue(ts.getTokenFilterFactories()[1] instanceof PositionAttributeFilterFactory); assertTrue(ts.getTokenFilterFactories()[2] instanceof SirenPayloadFilterFactory); // no query analyzer assertNull(tmp.getQueryAnalyzer()); }