org.apache.stanbol.enhancer.nlp.model.Token java code examples

/**
 * Checks the {@link #start} {@link #end} values against the span selected
 * by the parsed token.<p>
 * This method is called by all others that do add tokens.
 * @param token the added token
 */
private void checkSpan(Token token) {
  if(start > token.getStart()){
    start = token.getStart();
  }
  if(end < token.getEnd()){
    end = token.getEnd();
  }
}

/**
 * Creates a new Linked Entity including the first {@link Occurrence}
 * @param section the sentence (context) for the occurrence.
 * @param startToken the index of the start token
 * @param tokenSpan the number of token included in this span
 * @param suggestions the entity suggestions
 * @param types the types of the linked entity. 
 */
protected LinkedEntity(Section section,Token startToken,Token endToken, 
            List<Suggestion> suggestions, Set<IRI> types) {
  this(startToken.getSpan().substring(startToken.getStart(), endToken.getEnd()),
    suggestions,types);
  addOccurrence(section, startToken,endToken);
}
/**

private boolean isSectionBorder(Token token, String language) {
  Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
  if(posAnnotation != null && !Collections.disjoint(sectionBorderPosTags, posAnnotation.value().getPosHierarchy())){
    return true;
  } else {
    return false;
  }
}

private Occurrence(Section sentence,Token start,Token end){
  this.start = start.getStart();
  this.end = end.getEnd();
  String context = sentence.getSpan();
  if(context.length() > MAX_CONTEXT_LENGTH){
    context = start.getContext().getSpan().substring(
      Math.max(0, this.start-CONTEXT_TOKEN_COUNT),
      Math.min(this.end+CONTEXT_TOKEN_COUNT, start.getContext().getEnd())-1);
  }
  this.context = context;
}
/**

Token token = tokens.next();
tokenList.add(token);
tokenTextList.add(token.getSpan());
Value<PosTag> posValue = token.getAnnotation(POS_ANNOTATION);
if(posValue == null){
  throw new EngineException("Missing POS value for Token '"
    + token.getSpan()+"' of ContentItem "+ci.getUri()
    + "(Sentence: '"+sentence.getSpan()+"'). This may "
    + "indicate that a POS tagging Engine is missing in "
    tokenList.get(i-chunkTokenCount).getStart(), 
    tokenList.get(i-1).getEnd());
  chunk.addAnnotation(PHRASE_ANNOTATION, 
    new Value<PhraseTag>(tag,
  tokenList.get(i-chunkTokenCount).getStart(), 
  tokenList.get(i-1).getEnd());
chunk.addAnnotation(PHRASE_ANNOTATION, 
  new Value<PhraseTag>(tag,

this.value = value;
this.sentence = sentence;
this.start = token.getStart();
this.end = token.getEnd();
List<Value<PosTag>> tags = token.getAnnotations(NlpAnnotations.POS_ANNOTATION);
PosTag posTag = null;
if(tags != null && !tags.isEmpty()){

if(!adjectivesOnly){
  process = true;
  Value<PosTag> posTag = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
  if(posTag != null && posTag.probability() == Value.UNKNOWN_PROBABILITY
      || posTag.probability() >= (minPOSConfidence/2.0)){
  Iterator<Value<PosTag>> posTags = token.getAnnotations(NlpAnnotations.POS_ANNOTATION).iterator();
  boolean ignore = false;
  while(!ignore && !process && posTags.hasNext()) {
  String word = token.getSpan();
  double sentiment = 0.0;
  if(cats.isEmpty()){
    token.addAnnotation(SENTIMENT_ANNOTATION, new Value<Double>(sentiment));

  !sentimentAnnotation.value().equals(ZERO)){
sentiment = new Sentiment(word, sentimentAnnotation.value(),
  sentence == null || word.getEnd() > sentence.getEnd() ?
      null : sentence);
addToList = true;
Value<PosTag> pos = word.getAnnotation(NlpAnnotations.POS_ANNOTATION);
log.debug(" [{}] '{}' pos: {}, sentiment {}", new Object[]{
    addToList ? sentimentTokens.size() : "-", 
    word.getSpan(),pos.value().getCategories(), 
    sentiment == null ? "none" : sentiment.getValue()});

/**
 * Getter for token text
 * @return the text of the token
 */
public String getTokenText(){
  return token.getSpan();
}
/**

/**
 * Checks if the parsed {@link Token} represents an negation
 * @param token the word
 * @param index the index of the token relative to the sentence | section
 * @param language the language
 * @return <code>true</code> if the {@link Token} represents a negation.
 * Otherwise <code>false</code>
 */
private boolean isNoun(Token token, boolean firstTokenInSentence, String language) {
  String word = token.getSpan();
  if(!firstTokenInSentence && !word.isEmpty() && Character.isUpperCase(word.charAt(0))){
    return true; //assume all upper case tokens are Nouns
  }
  Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
  if(posAnnotation != null && (posAnnotation.value().hasCategory(LexicalCategory.Noun)
      || posAnnotation.value().getPosHierarchy().contains(Pos.CardinalNumber))){
    return true;
  }
  return false;
}
/**

for(Value<PosTag> posAnno : token.getAnnotations(NlpAnnotations.POS_ANNOTATION)){
  if(posAnno.value().isMapped()){
    for(LexicalCategory cat :posAnno.value().getCategories()){
    token.addAnnotation(NlpAnnotations.MORPHO_ANNOTATION, value);

/**
 * Everytime the entityLinker starts to process a token we need to check
 * if we need to add additional contextual information from the {@link ContentItem}
 * to the {@link InMemoryEntityIndex}
 */
@Override
public void startToken(Token token) {
  log.debug(" > start token: {}",token);
  final Integer actIndex = token.getStart();
  if(actIndex > lastIndex){
    for(Collection<EntityMention> mentions : mentionIndex.subMap(lastIndex, actIndex).values()){
      for(EntityMention mention : mentions){
        addEntity(mention);
      }
    }
    lastIndex = actIndex;
  } else if(lastIndex > actIndex){
    log.warn("Token {} has earlier start index as the last one {}!", token, lastIndex);
  } // else the same index ... ignore
}

String[] tokenTexts = new String[tokenList.size()];
for(int i=0;i<tokenList.size(); i++){
  tokenTexts[i] = tokenList.get(i).getSpan();
  token.addAnnotations(POS_ANNOTATION, Value.values(actPos, actProp,j));

/**
 * used for trace level logging of Tokens part of a chunk
 * @param token
 * @return
 */
private String logPosCategories(Token token){
  List<Value<PosTag>> posTags = token.getAnnotations(POS_ANNOTATION);
  List<String> catNames = new ArrayList<String>(posTags.size());
  for(Value<PosTag> tag : posTags){
    Set<LexicalCategory> cats = tag.value().getCategories();
    if(cats.size() > 1){
      catNames.add(cats.toString());
    } else if(!cats.isEmpty()){
      catNames.add(cats.iterator().next().toString());
    } else {
      catNames.add(tag.value().getTag());
    }
  }
  return catNames.toString();
}

token.addAnnotation(POS_ANNOTATION, Value.value(posTag));
inflectionAttr.getInflectionType();
if(morpho != null){ //if present add the morpho
  token.addAnnotation(MORPHO_ANNOTATION, Value.value(morpho));

/**
 * The {@link AnalysedText Text}
 * @return the text
 */
public AnalysedText getAnalysedText(){
  return token.getContext();
}
/**

new Object[]{tokenData.index,tokenData.token,
  tokenData.morpho != null ? ("(lemma: "+tokenData.morpho.getLemma()+") ") : "",
  tokenData.token.getAnnotations(POS_ANNOTATION),
  tokenData.inChunk != null ? tokenData.inChunk.chunk.getSpan() : "none"});
  activeChunk.matchableStartCharIndex = tokenData.token.getStart();
  activeChunk.matchableEndCharIndex = tokenData.token.getEnd();
      if(!ct.isLinkable) { //if not already processable
        log.debug("     > convert Token {}: {} (pos:{}) from matchable to processable",
          new Object[]{i,ct.token.getSpan(),ct.token.getAnnotations(POS_ANNOTATION)});
        ct.isLinkable = true;
        if(!hasLinkableToken){

private Occurrence(Section sentence,Token start,Token end){
  this.start = start.getStart();
  this.end = end.getEnd();
  String context = sentence.getSpan();
  if(context.length() > MAX_CONTEXT_LENGTH){
    context = start.getContext().getSpan().substring(
      Math.max(0, this.start-CONTEXT_TOKEN_COUNT),
      Math.min(this.end+CONTEXT_TOKEN_COUNT, start.getContext().getEnd())-1);
  }
  this.context = context;
}
/**

/**
 * Getter for token text
 * @return the text of the token
 */
public String getTokenText(){
  return token.getSpan();
}
/**

double sumScore = 0;
double[] matchScores = new double[ttd.length];
for(Value<PosTag> pos : token.getAnnotations(POS_ANNOTATION)){
  log.trace(" - {}",pos);
  double score = pos.probability();

Most used methods

Popular in Java

Making http post requests using okhttp
onCreateOptionsMenu (Activity)
getContentResolver (Context)
notifyDataSetChanged (ArrayAdapter)
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
Time (java.sql)
Java representation of an SQL TIME value. Provides utilities to format and parse the time's represen
Base64 (org.apache.commons.codec.binary)
Provides Base64 encoding and decoding as defined by RFC 2045.This class implements section 6.8. Base
IsNull (org.hamcrest.core)
Is the value null?
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
JButton (javax.swing)
Github Copilot alternatives

How to useToken in org.apache.stanbol.enhancer.nlp.model

Best Java code snippets using org.apache.stanbol.enhancer.nlp.model.Token (Showing top 20 results out of 315)

How to use
Token
in
org.apache.stanbol.enhancer.nlp.model