@Override public String toString() { final StringBuilder sb = new StringBuilder("{"); for (Map.Entry<Object,V> entry : entrySet()) { if (sb.length()>1) sb.append(", "); sb.append(entry); } return sb.append('}').toString(); }
@Override public String toString() { final StringBuilder sb = new StringBuilder("{"); for (Map.Entry<Object,V> entry : entrySet()) { if (sb.length()>1) sb.append(", "); sb.append(entry); } return sb.append('}').toString(); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final JsonTokenizer source = new JsonTokenizer(reader); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(matchVersion, source, fieldAnalyzer, valueAnalyzer); for (final Entry<Object, Analyzer> e : regAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } TokenStream sink = new PositionAttributeFilter(tt); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final ExtendedJsonTokenizer source = new ExtendedJsonTokenizer(reader); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(source, fieldAnalyzer, valueAnalyzer); for (final Entry<Object, Analyzer> e : regAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } TokenStream sink = new PositionAttributeFilter(tt); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); }
@Override protected TokenStreamComponents createComponents(String field, Reader reader) { final ExtendedJsonTokenizer source = new ExtendedJsonTokenizer(reader); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(source); for (final Entry<Object, Analyzer> e : indexAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } TokenStream sink = new PositionAttributeFilter(tt); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); }
/** * Helper method for now. * * TODO: when GH-9 is implemented (thread-local parsers) then this would be done for each thread-local parser only once. * * @param jsonParser * @param analysisService */ private void registerCustomDatatypes(ExtendedTreeQueryParser jsonParser, AnalysisService analysisService) { Map<String, Analyzer> map = jsonParser.getKeywordQueryParser().getQueryConfigHandler() .get(ExtendedKeywordQueryConfigHandler.KeywordConfigurationKeys.DATATYPES_ANALYZERS); if (map == null) { // this is perhaps not necessary....? map = new HashMap<String, Analyzer>(); jsonParser.getKeywordQueryParser().getQueryConfigHandler() .set(ExtendedKeywordQueryConfigHandler.KeywordConfigurationKeys.DATATYPES_ANALYZERS, map); } ExtendedJsonAnalyzer sirenAnalyzer = (ExtendedJsonAnalyzer) analysisService.analyzer(ExtendedJsonAnalyzer.NAME).analyzer(); for (final Entry<Object, Analyzer> e : sirenAnalyzer.getQueryAnalyzers().entrySet()) { map.put(new String((char[]) e.getKey()), e.getValue()); } }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final TupleTokenizer source = new TupleTokenizer(reader); TokenStream sink = new TokenTypeFilter(matchVersion, source, new int[] {TupleTokenizer.BNODE, TupleTokenizer.DOT}); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(sink, anyURIAnalyzer, stringAnalyzer); for (final Entry<Object, Analyzer> e : regLitAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } sink = new PositionAttributeFilter(tt); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final TupleTokenizer source = new TupleTokenizer(reader); TokenStream sink = new TokenTypeFilter(source, new int[] {TupleTokenizer.BNODE, TupleTokenizer.DOT}); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(matchVersion, sink, anyURIAnalyzer, stringAnalyzer); for (final Entry<Object, Analyzer> e : regLitAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } sink = new PositionAttributeFilter(tt); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final ConciseJsonTokenizer source = new ConciseJsonTokenizer(reader); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(source, fieldAnalyzer, valueAnalyzer); for (final Entry<Object, Analyzer> e : regAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } PathEncodingFilter pathEncodingFilter = new PathEncodingFilter(tt); pathEncodingFilter.setPreserveOriginal(this.generateTokensWithoutPath); TokenStream sink = new PositionAttributeFilter(pathEncodingFilter); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); }
@Override protected TokenStreamComponents createComponents(String field, Reader reader) { final ConciseJsonTokenizer source = new ConciseJsonTokenizer(reader); final DatatypeAnalyzerFilter tt = new DatatypeAnalyzerFilter(source); for (final Entry<Object, Analyzer> e : indexAnalyzers.entrySet()) { tt.register((char[]) e.getKey(), e.getValue()); } PathEncodingFilter pathEncodingFilter = new PathEncodingFilter(tt); pathEncodingFilter.setPreserveOriginal(this.generateTokensWithoutPath); TokenStream sink = new PositionAttributeFilter(pathEncodingFilter); sink = new SirenPayloadFilter(sink); return new TokenStreamComponents(source, sink); }
public DutchAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) { this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords)); this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable)); if (stemOverrideDict.isEmpty()) { this.stemdict = null; } else { // we don't need to ignore case here since we lowercase in this analyzer anyway StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false); CharArrayMap<String>.EntryIterator iter = stemOverrideDict.entrySet().iterator(); CharsRefBuilder spare = new CharsRefBuilder(); while (iter.hasNext()) { char[] nextKey = iter.nextKey(); spare.copyChars(nextKey, 0, nextKey.length); builder.add(spare.get(), iter.currentValue()); } try { this.stemdict = builder.build(); } catch (IOException ex) { throw new RuntimeException("can not build stem dict", ex); } } }