@Override public TokenStream create(TokenStream input) { // if the fst is null, it means there's actually no synonyms... just return the original stream // as there is nothing to do here. return map.fst == null ? input : new SynonymGraphFilter(input, map, ignoreCase); }
releaseBufferedToken(); if (parse()) { releaseBufferedToken(); lookaheadNextRead++; restoreState(token.state);
private void releaseBufferedToken() throws IOException { //System.out.println(" releaseBufferedToken"); BufferedOutputToken token = outputBuffer.pollFirst(); if (token.state != null) { // This is an original input token (keepOrig=true case): //System.out.println(" hasState"); restoreState(token.state); //System.out.println(" startOffset=" + offsetAtt.startOffset() + " endOffset=" + offsetAtt.endOffset()); } else { clearAttributes(); //System.out.println(" no state"); termAtt.append(token.term); // We better have a match already: assert matchStartOffset != -1; offsetAtt.setOffset(matchStartOffset, matchEndOffset); //System.out.println(" startOffset=" + matchStartOffset + " endOffset=" + matchEndOffset); typeAtt.setType(TYPE_SYNONYM); } //System.out.println(" lastNodeOut=" + lastNodeOut); //System.out.println(" term=" + termAtt); posIncrAtt.setPositionIncrement(token.startNode - lastNodeOut); lastNodeOut = token.startNode; posLenAtt.setPositionLength(token.endNode - token.startNode); }
doFinalCapture = true; if (liveToken) { capture(); capture(); capture(); bufferOutputTokens(matchOutput, matchInputLength); lookaheadNextRead += matchInputLength;
/** Buffers the current input token into lookahead buffer. */ private void capture() { assert liveToken; liveToken = false; BufferedInputToken token = lookahead.get(lookaheadNextWrite); lookaheadNextWrite++; token.state = captureState(); token.startOffset = offsetAtt.startOffset(); token.endOffset = offsetAtt.endOffset(); assert token.term.length() == 0; token.term.append(termAtt); captureCount++; maxLookaheadUsed = Math.max(maxLookaheadUsed, lookahead.getBufferSize()); //System.out.println(" maxLookaheadUsed=" + maxLookaheadUsed); }
@Override public TokenStream create(TokenStream tokenStream) { // fst is null means no synonyms return synonyms.fst == null ? tokenStream : new SynonymGraphFilter(tokenStream, synonyms, ignoreCase); } };
@Override public TokenStream create(TokenStream tokenStream) { // fst is null means no synonyms return synonymMap.fst == null ? tokenStream : new SynonymGraphFilter(tokenStream, synonymMap, ignoreCase); } }
@Override public TokenStream create(TokenStream tokenStream) { // fst is null means no synonyms return synonyms.fst == null ? tokenStream : new SynonymGraphFilter(tokenStream, synonyms, ignoreCase); } };
result = new SynonymGraphFilter(result, stemExceptions, true); result = new GermanStemmingFilter(result, hunspellDict, true, hyphen, true, wordlist, stemExceptions); result = new ReloadingSynonymGraphFilter(result, updateSynonymsService, true); } else { result = new SynonymGraphFilter(result, synonyms, true);
result = new SynonymGraphFilter(result, stemExceptions, true); result = new GermanStemmingFilter(result, hunspellDict, true, hyphen, true, wordlist, stemExceptions);
result = new SynonymGraphFilter(result, stemExceptions, true); result = new GermanStemmingFilter(result, hunspellDict, true, hyphen, true, wordlist, stemExceptions);
result = new SynonymGraphFilter(result, stemExceptions, true); result = new GermanStemmingFilter(result, hunspellDict, true, hyphen, true, wordlist, stemExceptions);
/** * Tries to stem the provided primary word using stemming exceptions. * * @param primaryWordTermAttr a {@link CharTermAttribute} containing the primary word * @return <tt>true</tt> if the primary word could be stemmed, <tt>false</tt> otherwise. * @throws IOException */ private boolean tryStemWithStemExceptions(CharTermAttribute primaryWordTermAttr) throws IOException { try (TokenStream stemExceptionsStream = new SynonymGraphFilter(new SingleTermTokenStream(primaryWordTermAttr.buffer(), primaryWordTermAttr.length()), this.stemExceptions, true)) { final CharTermAttribute stemExceptionTermAttr = stemExceptionsStream.addAttribute(CharTermAttribute.class); final TypeAttribute stemExceptionTypeAtt = stemExceptionsStream.addAttribute(TypeAttribute.class); stemExceptionsStream.reset(); stemExceptionsStream.incrementToken(); if (stemExceptionTermAttr.length() > 0 && SynonymGraphFilter.TYPE_SYNONYM.equals(stemExceptionTypeAtt.type())) { reAttachStemmedPrimaryWord(stemExceptionTermAttr.buffer(), stemExceptionTermAttr.length(), primaryWordTermAttr.length()); stemExceptionsStream.end(); return true; } stemExceptionsStream.end(); return false; } }
result = new SynonymGraphFilter(result, stemExceptions, true); result = new GermanStemmingFilter(result, hunspellDict, true, hyphen, true, wordlist, stemExceptions);