/** * Getter for the start character position * @return the start character position of the selected text span. */ public int getStartChar(){ return chunk.getStart(); } /**
/** * Getter for the start character position * @return the start character position of the selected text span. */ public int getStartChar(){ return chunk.getStart(); } /**
@Override public void reduce(TagLL[] head) { //(1) reduce Tags based on named entity phrases. for(TagLL tag = head[0]; tag != null; tag = tag.getNextTag()) { int start = tag.getStartOffset(); int end = tag.getEndOffset(); Chunk nePhrase = nePhrases.isEmpty() ? null : nePhrases.get(0); while(nePhrase != null && nePhrase.getEnd() <= start){ nePhrases.remove(0); nePhrase = nePhrases.isEmpty() ? null : nePhrases.get(0); } if(nePhrase == null || !(start <= nePhrase.getStart() && end >= nePhrase.getEnd())){ //does not cover any named entity phrase tag.removeLL(); //remove the tag from the cluster if(log.isTraceEnabled()){ log.trace(" > reduce tag {} - does not cover {}", tag, nePhrase); } } else if(log.isTraceEnabled()) {//the current Tag coveres a named entity phrase log.trace(" > keep tag {} for {}", tag, nePhrase); } } }
@Override public boolean evaluate(Object o) { if(o instanceof Chunk){ Chunk chunk = (Chunk)o; Value<NerTag> nerValue = chunk.getAnnotation(NER_ANNOTATION); if(nerValue != null){ NerTag nerTag = nerValue.value(); String nerType = nerTag.getType() != null ? nerTag.getType().getUnicodeString() : null; if( wildcardType || neTypes.contains(nerTag.getTag()) || (nerType != null && neTypes.contains(nerType))){ int[] span = new int[]{chunk.getStart(), chunk.getEnd()}; Set<String> types = nePhrasesTypes.get(span); if(types == null){ types = new HashSet<String>(4); nePhrasesTypes.put(span, types); } types.add(nerType); types.add(nerTag.getTag()); return true; } } } return false; } }
log.debug("lookup percentage: {}",lookupCount*100/(float)incrementCount); return false; } else if(offset.endOffset() > neChunk.getStart() || offset.startOffset() < neChunk.getEnd()){ log.trace("lookup: token [{},{}]: {} | named Entity [{},{}]:{}", new Object[]{ offset.startOffset(), offset.endOffset(), termAtt, neChunk.getStart(), neChunk.getEnd(), neChunk.getSpan()});