@Override public TokenStream create(final TokenStream input) { final DatatypeAnalyzerFilter f = new DatatypeAnalyzerFilter(input); for (final String datatype : analyzers.keySet()) { f.register(datatype.toCharArray(), analyzers.get(datatype)); } return f; }
public DatatypeAnalyzerFilter(final TokenStream input) { super(input); // here, we just need to indicate a version > Lucene 3.1 - see CharArrayMap dtsAnalyzer = new CharArrayMap<Analyzer>(Version.LUCENE_46, 64, false); this.initAttributes(); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final Version matchVersion = Version.LUCENE_4_9; final ConciseJsonTokenizer src = new ConciseJsonTokenizer(reader); TokenStream tok = new DatatypeAnalyzerFilter(src, new StandardAnalyzer(matchVersion), new StandardAnalyzer(matchVersion)); // The PathEncodingFilter is mandatory only for the ConciseJsonTokenizer PathEncodingFilter pathFilter = new PathEncodingFilter(tok); // here we tell the path filter to preserve the original tokens, // it will index the value with and without prepending the path pathFilter.setPreserveOriginal(true); // The PositionAttributeFilter and SirenPayloadFilter are mandatory // and must be always the last filters in your token stream tok = new PositionAttributeFilter(pathFilter); tok = new SirenPayloadFilter(tok); return new TokenStreamComponents(src, tok); }
/** * Create a {@link DatatypeAnalyzerFilter} with the given default * {@link Analyzer}s for the {@link JSONDatatype#JSON_FIELD} and * {@link XSDDatatype#XSD_STRING}. * * @param input the input token stream * @param fieldAnalyzer the default field name {@link Analyzer} * @param valueAnalyzer the default value {@link Analyzer} */ public DatatypeAnalyzerFilter(final TokenStream input, final Analyzer fieldAnalyzer, final Analyzer valueAnalyzer) { this(input); // register the default analyzers this.register(XSDDatatype.XSD_STRING.toCharArray(), valueAnalyzer); this.register(JSONDatatype.JSON_FIELD.toCharArray(), fieldAnalyzer); }
this.initTokenAttributes(); this.copyInnerStreamAttributes(); return true;
/** * Initialise the attributes of the main stream */ private void initAttributes() { termAtt = input.getAttribute(CharTermAttribute.class); offsetAtt = input.getAttribute(OffsetAttribute.class); posIncrAtt = input.getAttribute(PositionIncrementAttribute.class); typeAtt = input.getAttribute(TypeAttribute.class); dtypeAtt = input.getAttribute(DatatypeAttribute.class); nodeAtt = this.addAttribute(NodeAttribute.class); pathAtt = this.addAttribute(PathAttribute.class); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final ExtendedJsonTokenizer source = new ExtendedJsonTokenizer(reader); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(source, fieldAnalyzer, valueAnalyzer); for (final Entry<Object, Analyzer> e : regAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } TokenStream sink = new PositionAttributeFilter(tt); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); }
@Override protected TokenStreamComponents createComponents(String field, Reader reader) { final ExtendedJsonTokenizer source = new ExtendedJsonTokenizer(reader); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(source); for (final Entry<Object, Analyzer> e : indexAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } TokenStream sink = new PositionAttributeFilter(tt); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final TupleTokenizer source = new TupleTokenizer(reader); TokenStream sink = new TokenTypeFilter(matchVersion, source, new int[] {TupleTokenizer.BNODE, TupleTokenizer.DOT}); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(sink, anyURIAnalyzer, stringAnalyzer); for (final Entry<Object, Analyzer> e : regLitAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } sink = new PositionAttributeFilter(tt); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final ConciseJsonTokenizer source = new ConciseJsonTokenizer(reader); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(source, fieldAnalyzer, valueAnalyzer); for (final Entry<Object, Analyzer> e : regAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } PathEncodingFilter pathEncodingFilter = new PathEncodingFilter(tt); pathEncodingFilter.setPreserveOriginal(this.generateTokensWithoutPath); TokenStream sink = new PositionAttributeFilter(pathEncodingFilter); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); }
@Override protected TokenStreamComponents createComponents(String field, Reader reader) { final ConciseJsonTokenizer source = new ConciseJsonTokenizer(reader); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(source); for (final Entry<Object, Analyzer> e : indexAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } PathEncodingFilter pathEncodingFilter = new PathEncodingFilter(tt); pathEncodingFilter.setPreserveOriginal(this.generateTokensWithoutPath); TokenStream sink = new PositionAttributeFilter(pathEncodingFilter); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); }