protected AnalyzedTextSentenceTokenizer(AnalysedText at) { super(new StringReader(at.getText().toString())); this.at = at; sentences = at.getSentences(); }
protected AnalyzedTextSentenceTokenizer(AnalysedText at) { super(new StringReader(at.getText().toString())); this.at = at; sentences = at.getSentences(); }
protected AnalyzedTextSentenceTokenizer(AnalysedText at) { super(new StringReader(at.getText().toString())); this.at = at; sentences = at.getSentences(); }
CharSequence tagSequence = at.getText().subSequence(start, end); log.trace(" > reduce tag {} - no overlapp with linkable token", tagSequence); CharSequence text = at.getText(); log.trace(" - matchable Span {}{} for Tag {}[{},{}]", new Object[]{ text.subSequence(mSpan[0],mSpan[1]), CharSequence text = at.getText(); log.trace(" - reduce tag {}[{},{}] - does only match " + "{} of {} of matchable Chunk {}[{},{}]", CharSequence text = at.getText(); log.trace(" + keep tag {}[{},{}] - matches {} of {} " + "matchable Tokens for matchable Chunk {}[{},{}]", CharSequence text = at.getText(); log.trace(" + keep tag {}[{},{}] - matches whole Chunk {}[{},{}]", new Object[]{text.subSequence(start, end), start, end, CharSequence tagSequence = at.getText().subSequence(start, end); log.trace(" + keep tag {} - not in processable chunk", tagSequence);
TokenStream sentences = new SentenceTokenizer(new CharSequenceReader(at.getText())); try { sentences.reset();
TokenStream sentences = new SentenceTokenizer(new CharSequenceReader(at.getText())); try { while(sentences.incrementToken()){
int start = tokens.get(firstProcessableFoundIndex).token.getStart(); int end = tokens.get(lastProcessableFoundIndex).token.getEnd(); CharSequence content = state.getToken().token.getContext().getText(); CharSequence match = content.subSequence(start, end); ChunkData cd = state.getToken().inChunk;
int start = tokens.get(firstProcessableFoundIndex).token.getStart(); int end = tokens.get(lastProcessableFoundIndex).token.getEnd(); CharSequence content = state.getToken().token.getContext().getText(); CharSequence match = content.subSequence(start, end); ChunkData cd = state.getToken().inChunk;
Sentence sentence = sentPhrase.getSentence(); if(log.isDebugEnabled()){ //debug sentiment info CharSequence phraseText = at.getText().subSequence(sentPhrase.getStartIndex(), sentPhrase.getEndIndex()); log.debug("Write SentimentPhrase for {} (sentence: {})", phraseText, sentence == null ? "none" : sentence.getSpan().length() > 17 ? (sentence.getSpan().subSequence(0,17) + "...") : sentence.getSpan());
TokenStream tokenStream = tokenizerFactory.create(new CharSequenceReader(at.getText()));