/** * Checks the {@link #start} {@link #end} values against the span selected * by the parsed token.<p> * This method is called by all others that do add tokens. * @param token the added token */ private void checkSpan(Token token) { if(start > token.getStart()){ start = token.getStart(); } if(end < token.getEnd()){ end = token.getEnd(); } }
/** * Creates a new Linked Entity including the first {@link Occurrence} * @param section the sentence (context) for the occurrence. * @param startToken the index of the start token * @param tokenSpan the number of token included in this span * @param suggestions the entity suggestions * @param types the types of the linked entity. */ protected LinkedEntity(Section section,Token startToken,Token endToken, List<Suggestion> suggestions, Set<IRI> types) { this(startToken.getSpan().substring(startToken.getStart(), endToken.getEnd()), suggestions,types); addOccurrence(section, startToken,endToken); } /**
private boolean isSectionBorder(Token token, String language) { Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posAnnotation != null && !Collections.disjoint(sectionBorderPosTags, posAnnotation.value().getPosHierarchy())){ return true; } else { return false; } }
private Occurrence(Section sentence,Token start,Token end){ this.start = start.getStart(); this.end = end.getEnd(); String context = sentence.getSpan(); if(context.length() > MAX_CONTEXT_LENGTH){ context = start.getContext().getSpan().substring( Math.max(0, this.start-CONTEXT_TOKEN_COUNT), Math.min(this.end+CONTEXT_TOKEN_COUNT, start.getContext().getEnd())-1); } this.context = context; } /**
Token token = tokens.next(); tokenList.add(token); tokenTextList.add(token.getSpan()); Value<PosTag> posValue = token.getAnnotation(POS_ANNOTATION); if(posValue == null){ throw new EngineException("Missing POS value for Token '" + token.getSpan()+"' of ContentItem "+ci.getUri() + "(Sentence: '"+sentence.getSpan()+"'). This may " + "indicate that a POS tagging Engine is missing in " tokenList.get(i-chunkTokenCount).getStart(), tokenList.get(i-1).getEnd()); chunk.addAnnotation(PHRASE_ANNOTATION, new Value<PhraseTag>(tag, tokenList.get(i-chunkTokenCount).getStart(), tokenList.get(i-1).getEnd()); chunk.addAnnotation(PHRASE_ANNOTATION, new Value<PhraseTag>(tag,
this.value = value; this.sentence = sentence; this.start = token.getStart(); this.end = token.getEnd(); List<Value<PosTag>> tags = token.getAnnotations(NlpAnnotations.POS_ANNOTATION); PosTag posTag = null; if(tags != null && !tags.isEmpty()){
if(!adjectivesOnly){ process = true; Value<PosTag> posTag = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posTag != null && posTag.probability() == Value.UNKNOWN_PROBABILITY || posTag.probability() >= (minPOSConfidence/2.0)){ Iterator<Value<PosTag>> posTags = token.getAnnotations(NlpAnnotations.POS_ANNOTATION).iterator(); boolean ignore = false; while(!ignore && !process && posTags.hasNext()) { String word = token.getSpan(); double sentiment = 0.0; if(cats.isEmpty()){ token.addAnnotation(SENTIMENT_ANNOTATION, new Value<Double>(sentiment));
!sentimentAnnotation.value().equals(ZERO)){ sentiment = new Sentiment(word, sentimentAnnotation.value(), sentence == null || word.getEnd() > sentence.getEnd() ? null : sentence); addToList = true; Value<PosTag> pos = word.getAnnotation(NlpAnnotations.POS_ANNOTATION); log.debug(" [{}] '{}' pos: {}, sentiment {}", new Object[]{ addToList ? sentimentTokens.size() : "-", word.getSpan(),pos.value().getCategories(), sentiment == null ? "none" : sentiment.getValue()});
/** * Getter for token text * @return the text of the token */ public String getTokenText(){ return token.getSpan(); } /**
/** * Checks if the parsed {@link Token} represents an negation * @param token the word * @param index the index of the token relative to the sentence | section * @param language the language * @return <code>true</code> if the {@link Token} represents a negation. * Otherwise <code>false</code> */ private boolean isNoun(Token token, boolean firstTokenInSentence, String language) { String word = token.getSpan(); if(!firstTokenInSentence && !word.isEmpty() && Character.isUpperCase(word.charAt(0))){ return true; //assume all upper case tokens are Nouns } Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posAnnotation != null && (posAnnotation.value().hasCategory(LexicalCategory.Noun) || posAnnotation.value().getPosHierarchy().contains(Pos.CardinalNumber))){ return true; } return false; } /**
for(Value<PosTag> posAnno : token.getAnnotations(NlpAnnotations.POS_ANNOTATION)){ if(posAnno.value().isMapped()){ for(LexicalCategory cat :posAnno.value().getCategories()){ token.addAnnotation(NlpAnnotations.MORPHO_ANNOTATION, value);
/** * Everytime the entityLinker starts to process a token we need to check * if we need to add additional contextual information from the {@link ContentItem} * to the {@link InMemoryEntityIndex} */ @Override public void startToken(Token token) { log.debug(" > start token: {}",token); final Integer actIndex = token.getStart(); if(actIndex > lastIndex){ for(Collection<EntityMention> mentions : mentionIndex.subMap(lastIndex, actIndex).values()){ for(EntityMention mention : mentions){ addEntity(mention); } } lastIndex = actIndex; } else if(lastIndex > actIndex){ log.warn("Token {} has earlier start index as the last one {}!", token, lastIndex); } // else the same index ... ignore }
String[] tokenTexts = new String[tokenList.size()]; for(int i=0;i<tokenList.size(); i++){ tokenTexts[i] = tokenList.get(i).getSpan(); token.addAnnotations(POS_ANNOTATION, Value.values(actPos, actProp,j));
/** * used for trace level logging of Tokens part of a chunk * @param token * @return */ private String logPosCategories(Token token){ List<Value<PosTag>> posTags = token.getAnnotations(POS_ANNOTATION); List<String> catNames = new ArrayList<String>(posTags.size()); for(Value<PosTag> tag : posTags){ Set<LexicalCategory> cats = tag.value().getCategories(); if(cats.size() > 1){ catNames.add(cats.toString()); } else if(!cats.isEmpty()){ catNames.add(cats.iterator().next().toString()); } else { catNames.add(tag.value().getTag()); } } return catNames.toString(); }
token.addAnnotation(POS_ANNOTATION, Value.value(posTag)); inflectionAttr.getInflectionType(); if(morpho != null){ //if present add the morpho token.addAnnotation(MORPHO_ANNOTATION, Value.value(morpho));
/** * The {@link AnalysedText Text} * @return the text */ public AnalysedText getAnalysedText(){ return token.getContext(); } /**
new Object[]{tokenData.index,tokenData.token, tokenData.morpho != null ? ("(lemma: "+tokenData.morpho.getLemma()+") ") : "", tokenData.token.getAnnotations(POS_ANNOTATION), tokenData.inChunk != null ? tokenData.inChunk.chunk.getSpan() : "none"}); activeChunk.matchableStartCharIndex = tokenData.token.getStart(); activeChunk.matchableEndCharIndex = tokenData.token.getEnd(); if(!ct.isLinkable) { //if not already processable log.debug(" > convert Token {}: {} (pos:{}) from matchable to processable", new Object[]{i,ct.token.getSpan(),ct.token.getAnnotations(POS_ANNOTATION)}); ct.isLinkable = true; if(!hasLinkableToken){
private Occurrence(Section sentence,Token start,Token end){ this.start = start.getStart(); this.end = end.getEnd(); String context = sentence.getSpan(); if(context.length() > MAX_CONTEXT_LENGTH){ context = start.getContext().getSpan().substring( Math.max(0, this.start-CONTEXT_TOKEN_COUNT), Math.min(this.end+CONTEXT_TOKEN_COUNT, start.getContext().getEnd())-1); } this.context = context; } /**
/** * Getter for token text * @return the text of the token */ public String getTokenText(){ return token.getSpan(); } /**
double sumScore = 0; double[] matchScores = new double[ttd.length]; for(Value<PosTag> pos : token.getAnnotations(POS_ANNOTATION)){ log.trace(" - {}",pos); double score = pos.probability();