/** * Returns the next input Token whose termText() is not a stop word. */ public final Token next() throws IOException { // return the first non-stop word found for (Token token = input.next(); token != null; token = input.next()) if (!stopWords.contains(token.termText)) return token; // reached EOS -- return null return null; } }
public final Token next() throws IOException { Token t = input.next(); if (t == null) return null; t.termText = t.termText.toLowerCase(); return t; } }
/** * Read a token from the buffered input stream. * @return null at EOS */ protected Token read() throws IOException { if (inQueue.isEmpty()) { Token t = input.next(); return t; } return inQueue.removeFirst(); }
private Token nextTok() throws IOException { if (buffer!=null && !buffer.isEmpty()) { return buffer.removeFirst(); } else { return input.next(); } }
private Token nextTok(Token target) throws IOException { if (buffer!=null && !buffer.isEmpty()) { return buffer.removeFirst(); } else { return input.next(target); } }
private Token[] parseText(String text) throws IOException { if (text == null || text.trim().equals("")) return new Token[0]; final ArrayList result = new ArrayList(); final TokenStream ts = analyzer.tokenStream(DocumentBuilder.CONTENT_FIELD_NAME, new StringReader(text)); for (Token token = ts.next(); token != null; token = ts.next()) { result.add(token); } return (Token[]) result.toArray(new Token[result.size()]); }
public Token next(final Token reusableToken) throws IOException { assert reusableToken != null; Token nextToken = input.next(reusableToken); sink.add(nextToken); return nextToken; }
public Token next() throws IOException { if (synonymStack.size() > 0) { return (Token) synonymStack.removeFirst(); } Token token = input.next(); if (token == null) { return null; } addAliasesToStack(token); return token; }
private static Token[] parseText(String text) throws IOException { if (text == null || text.trim().equals("")) { return new Token[0]; } final ArrayList result = new ArrayList(); final TokenStream ts = new SimpleAnalyzer().tokenStream(DocumentBuilder.CONTENT_FIELD_NAME, new StringReader(text)); for (Token token = ts.next(); token != null; token = ts.next()) { result.add(token); } return (Token[]) result.toArray(new Token[result.size()]); }
public Token next() throws IOException { // we put the payload on the last token. It has already been indexed // and it will be used on the all property later on if (lastToken != null && payload != null) { lastToken.setPayload(payload); } lastToken = tokenStream.next(); if (lastToken != null) { tokens.add(lastToken); } return lastToken; }
public Token next(final Token reusableToken) throws IOException { assert reusableToken != null; Token nextToken = input.next(reusableToken); sink.add(nextToken); return nextToken; }
/** Returns the next input Token, after being stemmed */ public final Token next() throws IOException { Token token = input.next(); if (token == null) return null; else { String s = stemmer.stem(token.termText); if (s != token.termText) // Yes, I mean object reference comparison here token.termText = s; return token; } } }
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException { Collection<Token> result = new ArrayList<Token>(); Token token = null; TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q)); ts.reset(); while ((token = ts.next()) != null){ result.add(token); } return result; }
public final Token next(final Token reusableToken) throws IOException { assert reusableToken != null; Token nextToken = input.next(reusableToken); if (nextToken != null) { final char[] buffer = nextToken.termBuffer(); final int length = nextToken.termLength(); for(int i=0;i<length;i++) buffer[i] = Character.toLowerCase(buffer[i]); return nextToken; } else return null; } }
// For each chapter... Reader reader = ...; // You are responsible for opening a reader for each chapter Analyzer analyzer = new StandardAnalyzer(); TokenStream tokenStream = analyzer.tokenStream("", reader); Token token = new Token(); while ((token = tokenStream.next(token)) != null) ) { String keyword = token.term(); // You can now do whatever you wish with this keyword }
public final Token next(final Token reusableToken) throws IOException { assert reusableToken != null; Token nextToken = input.next(reusableToken); if (nextToken == null) return null; if (stemmer.stem(nextToken.termBuffer(), 0, nextToken.termLength())) nextToken.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength()); return nextToken; } }
public final Token next(final Token reusableToken) throws IOException { assert reusableToken != null; Token nextToken = input.next(reusableToken); if (nextToken == null) return null; if (stemmer.stem(nextToken.termBuffer(), 0, nextToken.termLength())) nextToken.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength()); return nextToken; } }