@Override public void reduce(TagLL[] head) { //(1) reduce Tags based on named entity phrases. for(TagLL tag = head[0]; tag != null; tag = tag.getNextTag()) { int start = tag.getStartOffset(); int end = tag.getEndOffset(); Chunk nePhrase = nePhrases.isEmpty() ? null : nePhrases.get(0); while(nePhrase != null && nePhrase.getEnd() <= start){ nePhrases.remove(0); nePhrase = nePhrases.isEmpty() ? null : nePhrases.get(0); } if(nePhrase == null || !(start <= nePhrase.getStart() && end >= nePhrase.getEnd())){ //does not cover any named entity phrase tag.removeLL(); //remove the tag from the cluster if(log.isTraceEnabled()){ log.trace(" > reduce tag {} - does not cover {}", tag, nePhrase); } } else if(log.isTraceEnabled()) {//the current Tag coveres a named entity phrase log.trace(" > keep tag {} for {}", tag, nePhrase); } } }
offset.startOffset(), offset.endOffset(), termAtt}); while((neChunk == null || neChunk.getEnd() < offset.startOffset()) && neChunks.hasNext()){ neChunk = neChunks.next(); nePhrases.add(neChunk); log.debug("lookup percentage: {}",lookupCount*100/(float)incrementCount); return false; } else if(offset.endOffset() > neChunk.getStart() || offset.startOffset() < neChunk.getEnd()){ log.trace("lookup: token [{},{}]: {} | named Entity [{},{}]:{}", new Object[]{ offset.startOffset(), offset.endOffset(), termAtt, neChunk.getStart(), neChunk.getEnd(), neChunk.getSpan()});
Chunk chunk = chunkFactory.createChunk(current.get(0), lastConsumedToken); chunk.addAnnotation(PHRASE_ANNOTATION, Value.value(phraseTag)); if(log.isDebugEnabled()){ log.debug(" << add {} phrase {} '{}'", new Object[]{ phraseType.getPhraseType().name(), chunk,chunk.getSpan()});
@Override public boolean evaluate(Object o) { if(o instanceof Chunk){ Chunk chunk = (Chunk)o; Value<NerTag> nerValue = chunk.getAnnotation(NER_ANNOTATION); if(nerValue != null){ NerTag nerTag = nerValue.value(); String nerType = nerTag.getType() != null ? nerTag.getType().getUnicodeString() : null; if( wildcardType || neTypes.contains(nerTag.getTag()) || (nerType != null && neTypes.contains(nerType))){ int[] span = new int[]{chunk.getStart(), chunk.getEnd()}; Set<String> types = nePhrasesTypes.get(span); if(types == null){ types = new HashSet<String>(4); nePhrasesTypes.put(span, types); } types.add(nerType); types.add(nerTag.getTag()); return true; } } } return false; } }
chunk.addAnnotation(NER_ANNOTATION, Value.value(nerTag, confidence));
log.debug(">> Chunk: (type:{}, startPos: {}) text: '{}'", new Object []{ chunkData.chunk.getType(), chunkData.startToken, chunkData.chunk.getSpan() }); tokenData.morpho != null ? ("(lemma: "+tokenData.morpho.getLemma()+") ") : "", tokenData.token.getAnnotations(POS_ANNOTATION), tokenData.inChunk != null ? tokenData.inChunk.chunk.getSpan() : "none"}); if(log.isDebugEnabled()){ log.debug(" << end Chunk {} '{}' @pos: {}", new Object[]{ activeChunk.chunk, activeChunk.chunk.getSpan(), activeChunk.endToken});
this.chunk = chunk; Boolean process = null; for (Value<PhraseTag> phraseAnnotation : chunk.getAnnotations(PHRASE_ANNOTATION)) { if (tpc.getProcessedPhraseCategories().contains(phraseAnnotation.value().getCategory()) || tpc.getProcessedPhraseTags().contains(phraseAnnotation.value().getTag())) { isNamedEntity = chunk.getAnnotation(NlpAnnotations.NER_ANNOTATION) != null; if(process == null && isNamedEntity && tpc.getProcessedPhraseCategories().contains(LexicalCategory.Noun)){
/** * Getter for the end character position of the text * @return the end character position */ public int getEndChar(){ return chunk.getEnd(); } /**
/** * Getter for the start character position * @return the start character position of the selected text span. */ public int getStartChar(){ return chunk.getStart(); } /**
tokenList.get(i-chunkTokenCount).getStart(), tokenList.get(i-1).getEnd()); chunk.addAnnotation(PHRASE_ANNOTATION, new Value<PhraseTag>(tag, chunkProps/(double)chunkTokenCount)); tokenList.get(i-chunkTokenCount).getStart(), tokenList.get(i-1).getEnd()); chunk.addAnnotation(PHRASE_ANNOTATION, new Value<PhraseTag>(tag, chunkProps/(double)chunkTokenCount));
new Object[]{token.index,token.getTokenText(),token.getTokenLemma(), token.isLinkable, token.isMatchable, token.inChunk != null ? (token.inChunk.chunk + " "+ token.inChunk.chunk.getSpan()) : "none"});
log.debug(">> Chunk: (type:{}, startPos: {}) text: '{}'", new Object []{ chunkData.chunk.getType(), chunkData.startToken, chunkData.chunk.getSpan() }); tokenData.morpho != null ? ("(lemma: "+tokenData.morpho.getLemma()+") ") : "", tokenData.token.getAnnotations(POS_ANNOTATION), tokenData.inChunk != null ? tokenData.inChunk.chunk.getSpan() : "none"}); if(log.isDebugEnabled()){ log.debug(" << end Chunk {} '{}' @pos: {}", new Object[]{ activeChunk.chunk, activeChunk.chunk.getSpan(), activeChunk.endToken});
this.chunk = chunk; Boolean process = null; for (Value<PhraseTag> phraseAnnotation : chunk.getAnnotations(PHRASE_ANNOTATION)) { if (tpc.getProcessedPhraseCategories().contains(phraseAnnotation.value().getCategory()) || tpc.getProcessedPhraseTags().contains(phraseAnnotation.value().getTag())) { isNamedEntity = chunk.getAnnotation(NlpAnnotations.NER_ANNOTATION) != null; if(process == null && isNamedEntity && tpc.getProcessedPhraseCategories().contains(LexicalCategory.Noun)){
/** * Getter for the end character position of the text * @return the end character position */ public int getEndChar(){ return chunk.getEnd(); } /**
/** * Getter for the start character position * @return the start character position of the selected text span. */ public int getStartChar(){ return chunk.getStart(); } /**
chunk.addAnnotation(NlpAnnotations.NER_ANNOTATION, Value.value(ner.tag));
new Object[]{token.index,token.getTokenText(),token.getTokenLemma(), token.isLinkable, token.isMatchable, token.inChunk != null ? (token.inChunk.chunk + " "+ token.inChunk.chunk.getSpan()) : "none"});
if(log.isTraceEnabled()){ log.trace(" ... checking match with chunk {}: {}", cd.chunk, cd.chunk.getSpan());
if(log.isTraceEnabled()){ log.trace(" ... checking match with chunk {}: {}", cd.chunk, cd.chunk.getSpan());