@Override public Analyzer createAnalyzer() { return new ThaiAnalyzer(); }
@Override public Tokenizer create(AttributeFactory factory) { return new ThaiTokenizer(factory); } }
@Override protected boolean incrementWord() { int start = wordBreaker.current(); if (start == BreakIterator.DONE) { return false; // BreakIterator exhausted } // find the next set of boundaries, skipping over non-tokens int end = wordBreaker.next(); while (end != BreakIterator.DONE && !Character.isLetterOrDigit(Character.codePointAt(buffer, sentenceStart + start, sentenceEnd))) { start = end; end = wordBreaker.next(); } if (end == BreakIterator.DONE) { return false; // BreakIterator exhausted } clearAttributes(); termAtt.copyBuffer(buffer, sentenceStart + start, end - start); offsetAtt.setOffset(correctOffset(offset + sentenceStart + start), correctOffset(offset + sentenceStart + end)); return true; } }
public ThaiAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new ThaiAnalyzer(Analysis.parseStopWords(env, settings, ThaiAnalyzer.getDefaultStopSet())); analyzer.setVersion(version); }
public void reset(Reader input) throws IOException { assert input != null; try { this.tokenizer = new ThaiTokenizer(); tokenizer.setReader(input); this.term = tokenizer.addAttribute(CharTermAttribute.class); this.tokenizer.reset(); } catch (Exception e) { throw ExceptionUtils.wrapAsRuntimeException(e); } }
public ThaiWordFilter create(TokenStream input) { return new ThaiWordFilter(input); } }
public short nextToken() throws IOException { final boolean hasNextToken = tokenizer.incrementToken(); if (hasNextToken) { final char [] image = term.buffer(); final int length = term.length(); tempCharSequence.reset(image, 0, length); return ITokenizer.TT_TERM; } return ITokenizer.TT_EOF; }
@Override public Analyzer newAnalyzer(final String args) { return new ThaiAnalyzer(); } @Override
@Override public Tokenizer create() { return new ThaiTokenizer(); } }
@Override public ThaiWordFilter create(TokenStream input) { return new ThaiWordFilter(input); } }
@Override public Analyzer newAnalyzer(final JSONObject args) { return new ThaiAnalyzer(); } },
/** * Creates * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * used to tokenize all the text in the provided {@link Reader}. * * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from a {@link ThaiTokenizer} filtered with * {@link LowerCaseFilter}, {@link DecimalDigitFilter} and {@link StopFilter} */ @Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new ThaiTokenizer(); TokenStream result = new LowerCaseFilter(source); result = new DecimalDigitFilter(result); result = new StopFilter(result, stopwords); return new TokenStreamComponents(source, result); }
@Override protected Analyzer build() { return new ThaiAnalyzer(); } };
@Override protected Tokenizer create(Version version) { return new ThaiTokenizer(); } }
public Analyzer newInstance(final boolean filterStopwords) { return new ThaiAnalyzer(); } };
@Override public Tokenizer create(AttributeFactory factory) { return new ThaiTokenizer(factory); } }
public Analyzer newInstance(final boolean filterStopwords) { return new ThaiAnalyzer(); } };
@Override public Tokenizer create() { return new ThaiTokenizer(); } }
@Override protected Analyzer create(Version version) { Analyzer a = new ThaiAnalyzer(); a.setVersion(version.luceneVersion); return a; } };
@Override public Tokenizer create() { return new ThaiTokenizer(); } }