private List<Token> tokenize(Section section,String langauge) { Tokenizer tokenizer = getTokenizer(langauge); String text = section.getSpan(); List<Token> tokens = new ArrayList<Token>(text.length()/5); //assume avr. token length is 5 opennlp.tools.util.Span[] tokenSpans = tokenizer.tokenizePos(section.getSpan()); for(int i=0;i<tokenSpans.length;i++){ Token token = section.addToken(tokenSpans[i].getStart(), tokenSpans[i].getEnd()); log.trace(" > add {}",token); tokens.add(token); } return tokens; }
Token token = section.addToken(tokenSpans[i].getStart(), tokenSpans[i].getEnd()); log.trace(" > add {}",token);