org.apache.stanbol.enhancer.nlp.pos.PosTag java code examples

@Override
public ObjectNode serialize(ObjectMapper mapper, PosTag value){
  ObjectNode jPosTag = mapper.createObjectNode();
  jPosTag.put("tag", value.getTag());
  if(value.getPos().size() == 1){
    jPosTag.put("pos",value.getPos().iterator().next().ordinal());
  } else if(!value.getPos().isEmpty()){
    ArrayNode jPos = mapper.createArrayNode();
    for(Pos pos : value.getPos()){
      jPos.add(pos.ordinal());
  if(!value.getCategories().isEmpty()){
    categories.addAll(value.getCategories());
    for(Pos pos : value.getPos()){
      categories.removeAll(pos.categories());

private PosTag getPosTag(TagSet<PosTag> model, Map<String,PosTag> adhocTags, String tag, String language) {
  PosTag posTag = model.getTag(tag);
  if(posTag != null){
    return posTag;
  }
  posTag = adhocTags.get(tag);
  if(posTag != null){
    return posTag;
  }
  posTag = new PosTag(tag);
  adhocTags.put(tag, posTag);
  log.info("Encountered umapped POS tag '{}' for langauge '{}'",tag,language);
  return posTag;
}

/**
 * Checks if a posTag matches against this TokenTypeDefinition
 * @param posTag the posTag to check
 * @return <code>true</code> in case of a match. Otherwise <code>false</code>
 * @throws NullPointerException if the parsed posTag is <code>null</code>
 */
public boolean matches(PosTag posTag){
  //check against incldues categories, posTags and tags
  boolean matches = 
      (!Collections.disjoint(posTag.getCategories(), categories)) ||
      (!Collections.disjoint(posTag.getPosHierarchy(), posTags)) ||
      tags.contains(posTag.getTag());
  //if there is a match we need still to check for excluded POS tags
  return matches ? Collections.disjoint(posTag.getPosHierarchy(),excludedPosTags) :
    false;
}

/**
 * used for trace level logging of Tokens part of a chunk
 * @param token
 * @return
 */
private String logPosCategories(Token token){
  List<Value<PosTag>> posTags = token.getAnnotations(POS_ANNOTATION);
  List<String> catNames = new ArrayList<String>(posTags.size());
  for(Value<PosTag> tag : posTags){
    Set<LexicalCategory> cats = tag.value().getCategories();
    if(cats.size() > 1){
      catNames.add(cats.toString());
    } else if(!cats.isEmpty()){
      catNames.add(cats.iterator().next().toString());
    } else {
      catNames.add(tag.value().getTag());
    }
  }
  return catNames.toString();
}

/**
 * Checks if the parsed {@link Token} represents an negation
 * @param token the word
 * @param index the index of the token relative to the sentence | section
 * @param language the language
 * @return <code>true</code> if the {@link Token} represents a negation.
 * Otherwise <code>false</code>
 */
private boolean isNoun(Token token, boolean firstTokenInSentence, String language) {
  String word = token.getSpan();
  if(!firstTokenInSentence && !word.isEmpty() && Character.isUpperCase(word.charAt(0))){
    return true; //assume all upper case tokens are Nouns
  }
  Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
  if(posAnnotation != null && (posAnnotation.value().hasCategory(LexicalCategory.Noun)
      || posAnnotation.value().getPosHierarchy().contains(Pos.CardinalNumber))){
    return true;
  }
  return false;
}
/**

/**
 * Writes the {@link NlpAnnotations#POS_ANNOTATION} as NIF 1.0 to the parsed
 * RDF graph by using the parsed segmentUri as subject
 * @param graph the graph
 * @param annotated the annotated element (e.g. a {@link Token})
 * @param segmentUri the URI of the resource representing the parsed 
 * annotated element in the graph
 */
public static void writePos(Graph graph, Annotated annotated, IRI segmentUri) {
  Value<PosTag> posTag = annotated.getAnnotation(NlpAnnotations.POS_ANNOTATION);
  if(posTag != null){
    if(posTag.value().isMapped()){
      for(Pos pos : posTag.value().getPos()){
        graph.add(new TripleImpl(segmentUri, Nif20.oliaCategory.getUri(), 
          pos.getUri()));
      }
      for(LexicalCategory cat : posTag.value().getCategories()){
        graph.add(new TripleImpl(segmentUri, Nif20.oliaCategory.getUri(), 
          cat.getUri()));
      }
    }
    graph.add(new TripleImpl(segmentUri, Nif20.posTag.getUri(), 
      lf.createTypedLiteral(posTag.value().getTag())));
    //set the oliaConf
    //remove existing conf values (e.g. for a single word phrase)
    setOliaConf(graph, segmentUri, posTag);
  }
}
/**

if((!disjoint(tpc.getLinkedLexicalCategories(), posTag.getCategories())) ||
    (!disjoint(tpc.getLinkedPos(), posTag.getPosHierarchy())) ||
    tpc.getLinkedPosTags().contains(posTag.getTag())){
  if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY ||
      posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){
for(Value<PosTag> posAnnotation : posAnnotations){
  PosTag posTag = posAnnotation.value();
  if(posTag.isMapped()){
    if((!Collections.disjoint(tpc.getMatchedLexicalCategories(), posTag.getCategories())) ||
        (!Collections.disjoint(tpc.getMatchedPos(), posTag.getPosHierarchy())) ||
        tpc.getMatchedPosTags().contains(posTag.getTag())){
      if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY ||
          posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){
if((!disjoint(ProcessingState.SUB_SENTENCE_START_POS,posTag.getPosHierarchy()))){
  if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY ||
      posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){
for(Value<MorphoFeatures> morphoAnnotation : morphoAnnotations){
  for(PosTag posTag : morphoAnnotation.value().getPosList()){
    if(!disjoint(selectedPosTag.getCategories(),posTag.getCategories())){
      mf = morphoAnnotation.value();
      break selectMorphoFeature; //stop after finding the first one

if(posAnno.value().isMapped()){
  for(LexicalCategory cat :posAnno.value().getCategories()){
    if(!tokenLexCats.containsKey(cat)){ //do not override with lover prob
      tokenLexCats.put(cat, posAnno.probability());
  Set<LexicalCategory> mfCats = EnumSet.noneOf(LexicalCategory.class);
  for(PosTag mfPos : mf.getPosList()){
    mfCats.addAll(mfPos.getCategories());

@Override
public Set<LexicalCategory> getCategories(PosTag posTag) {
  return posTag.getCategories();
}

  if(tag.probability() == Value.UNKNOWN_PROBABILITY ||
      tag.probability() >= MIN_POS_CONF || 
      !Collections.disjoint(tag.value().getCategories(),PREF_LEX_CAT)){
    posTag = tag.value();
    break;
  posTag = tags.get(0).value();
if(posTag.hasCategory(LexicalCategory.Noun)){
if(posTag.hasCategory(LexicalCategory.Verb)){
  setVerb(token);

while( j < posSequences.length && !done){
  String p = posSequences[j].getOutcomes().get(i);
  done = j > 0 && p.equals(actPos[0].getTag());
  if(!done){
    actPos[j] = getPosTag(posModel,adhocTags,p,language);

private boolean isSectionBorder(Token token, String language) {
  Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
  if(posAnnotation != null && !Collections.disjoint(sectionBorderPosTags, posAnnotation.value().getPosHierarchy())){
    return true;
  } else {
    return false;
  }
}

  posTag = adhocTags.get(posAttr.getPartOfSpeech());
  if(posTag == null){
    posTag = new PosTag(posAttr.getPartOfSpeech());
    adhocTags.put(posAttr.getPartOfSpeech(), posTag);
    log.warn(" ... missing PosTag mapping for {}",posAttr.getPartOfSpeech());
  sentStartOffset = offset.startOffset();
if(posTag.hasPos(Pos.Point)) { 
  Sentence sent = at.addSentence(sentStartOffset, offset.startOffset());

PosTag posTag = pos.value();
if (posTag.hasCategory(LexicalCategory.Noun)
  || posTag.hasCategory(LexicalCategory.Adjective)) {
  nounNo++;
if (!hasGoodDeterminer && posTag.hasPos(Pos.Determiner)
  && langDeterminerSet.contains(token.getSpan().toLowerCase())) {
  hasGoodDeterminer = true;

private boolean isVerb(Token token, String language) {
  Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
  return posAnnotation == null ? false : posAnnotation.value().hasCategory(LexicalCategory.Verb);
}

/**
 * Writes the {@link NlpAnnotations#POS_ANNOTATION} as NIF 1.0 to the parsed
 * RDF graph by using the parsed segmentUri as subject
 * @param graph the graph
 * @param annotated the annotated element (e.g. a {@link Token})
 * @param segmentUri the URI of the resource representing the parsed 
 * annotated element in the graph
 */
public static void writePos(Graph graph, Annotated annotated, IRI segmentUri) {
  Value<PosTag> posTag = annotated.getAnnotation(NlpAnnotations.POS_ANNOTATION);
  if(posTag != null){
    if(posTag.value().isMapped()){
      for(Pos pos : posTag.value().getPos()){
        graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), 
          pos.getUri()));
      }
      for(LexicalCategory cat : posTag.value().getCategories()){
        graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), 
          cat.getUri()));
      }
    }
    graph.add(new TripleImpl(segmentUri, SsoOntology.posTag.getUri(), 
      lf.createTypedLiteral(posTag.value().getTag())));
    graph.add(new TripleImpl(segmentUri, ENHANCER_CONFIDENCE, 
      lf.createTypedLiteral(posTag.probability())));
  }
}

if((!disjoint(tpc.getLinkedLexicalCategories(), posTag.getCategories())) ||
    (!disjoint(tpc.getLinkedPos(), posTag.getPosHierarchy())) ||
    tpc.getLinkedPosTags().contains(posTag.getTag())){
  if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY ||
      posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){
for(Value<PosTag> posAnnotation : posAnnotations){
  PosTag posTag = posAnnotation.value();
  if(posTag.isMapped()){
    if((!Collections.disjoint(tpc.getMatchedLexicalCategories(), posTag.getCategories())) ||
        (!Collections.disjoint(tpc.getMatchedPos(), posTag.getPosHierarchy())) ||
        tpc.getMatchedPosTags().contains(posTag.getTag())){
      if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY ||
          posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){
if((!disjoint(ProcessingState.SUB_SENTENCE_START_POS,posTag.getPosHierarchy()))){
  if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY ||
      posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){
for(Value<MorphoFeatures> morphoAnnotation : morphoAnnotations){
  for(PosTag posTag : morphoAnnotation.value().getPosList()){
    if(!disjoint(selectedPosTag.getCategories(),posTag.getCategories())){
      mf = morphoAnnotation.value();
      break selectMorphoFeature; //stop after finding the first one

  new PlainLiteralImpl(getLemma(), lang)));
for(PosTag pos: getPosList()){
  if(pos.isMapped()){
    for(LexicalCategory cat : pos.getCategories()){
      result.add(new TripleImpl(textAnnotation, RDF_TYPE, cat.getUri()));

/**
 * If the current Token should be considered for counting distances to
 * negations and nouns
 * @param token
 * @param language
 * @return
 */
private boolean isCountable(Token token, String language){
  Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION);
  if(posAnnotation != null && !Collections.disjoint(countableLexCats, posAnnotation.value().getCategories())){
    return true;
  } else {
    return false;
  }
}

    + "does not provide POS tags for each token!");
} else {
  posList.add(posValue.value().getTag());

Javadoc

An POS (part-of-speech) tag typically assigned by an POS-Tagger (an NLP component) to a Token by using the POS#POS_ANNOTATION

The only required field is #getTag() - the string tag assigned by the POS Tagger.

PosTags can be mapped to a LexicalCategory and be part of an TagSet. NOTE that the TagSet is set by the TagSet#addTag(PosTag) method.

Most used methods

getCategories
The LexicalCategory of this tag
getTag
isMapped
Returns true if this PosTag is mapped to a LexicalCategory or a Pos type as defined by the Olia Onto
<init>
Creates a PosTag that is assigned to a LexicalCategory
getPos
Getter for the Pos mapped to this PosTag
getPosHierarchy
hasCategory
Checks if this PosTag is mapped to the parsed LexicalCategory
hasPos
Checks if the PosTag is of the parsed Postag. This also considers the transitive hierarchy of the Po

Popular in Java

Making http post requests using okhttp
scheduleAtFixedRate (ScheduledExecutorService)
findViewById (Activity)
setScale (BigDecimal)
BufferedReader (java.io)
Wraps an existing Reader and buffers the input. Expensive interaction with the underlying reader is
Enumeration (java.util)
A legacy iteration interface.New code should use Iterator instead. Iterator replaces the enumeration
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
StringUtils (org.apache.commons.lang)
Operations on java.lang.String that arenull safe. * IsEmpty/IsBlank - checks if a String contains
Modifier (javassist)
The Modifier class provides static methods and constants to decode class and member access modifiers
Reference (javax.naming)
CodeWhisperer alternatives

How to usePosTag in org.apache.stanbol.enhancer.nlp.pos

Best Java code snippets using org.apache.stanbol.enhancer.nlp.pos.PosTag (Showing top 20 results out of 315)

How to use
PosTag
in
org.apache.stanbol.enhancer.nlp.pos