@Override public ObjectNode serialize(ObjectMapper mapper, PosTag value){ ObjectNode jPosTag = mapper.createObjectNode(); jPosTag.put("tag", value.getTag()); if(value.getPos().size() == 1){ jPosTag.put("pos",value.getPos().iterator().next().ordinal()); } else if(!value.getPos().isEmpty()){ ArrayNode jPos = mapper.createArrayNode(); for(Pos pos : value.getPos()){ jPos.add(pos.ordinal()); if(!value.getCategories().isEmpty()){ categories.addAll(value.getCategories()); for(Pos pos : value.getPos()){ categories.removeAll(pos.categories());
private PosTag getPosTag(TagSet<PosTag> model, Map<String,PosTag> adhocTags, String tag, String language) { PosTag posTag = model.getTag(tag); if(posTag != null){ return posTag; } posTag = adhocTags.get(tag); if(posTag != null){ return posTag; } posTag = new PosTag(tag); adhocTags.put(tag, posTag); log.info("Encountered umapped POS tag '{}' for langauge '{}'",tag,language); return posTag; }
/** * Checks if a posTag matches against this TokenTypeDefinition * @param posTag the posTag to check * @return <code>true</code> in case of a match. Otherwise <code>false</code> * @throws NullPointerException if the parsed posTag is <code>null</code> */ public boolean matches(PosTag posTag){ //check against incldues categories, posTags and tags boolean matches = (!Collections.disjoint(posTag.getCategories(), categories)) || (!Collections.disjoint(posTag.getPosHierarchy(), posTags)) || tags.contains(posTag.getTag()); //if there is a match we need still to check for excluded POS tags return matches ? Collections.disjoint(posTag.getPosHierarchy(),excludedPosTags) : false; }
/** * used for trace level logging of Tokens part of a chunk * @param token * @return */ private String logPosCategories(Token token){ List<Value<PosTag>> posTags = token.getAnnotations(POS_ANNOTATION); List<String> catNames = new ArrayList<String>(posTags.size()); for(Value<PosTag> tag : posTags){ Set<LexicalCategory> cats = tag.value().getCategories(); if(cats.size() > 1){ catNames.add(cats.toString()); } else if(!cats.isEmpty()){ catNames.add(cats.iterator().next().toString()); } else { catNames.add(tag.value().getTag()); } } return catNames.toString(); }
/** * Checks if the parsed {@link Token} represents an negation * @param token the word * @param index the index of the token relative to the sentence | section * @param language the language * @return <code>true</code> if the {@link Token} represents a negation. * Otherwise <code>false</code> */ private boolean isNoun(Token token, boolean firstTokenInSentence, String language) { String word = token.getSpan(); if(!firstTokenInSentence && !word.isEmpty() && Character.isUpperCase(word.charAt(0))){ return true; //assume all upper case tokens are Nouns } Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posAnnotation != null && (posAnnotation.value().hasCategory(LexicalCategory.Noun) || posAnnotation.value().getPosHierarchy().contains(Pos.CardinalNumber))){ return true; } return false; } /**
/** * Writes the {@link NlpAnnotations#POS_ANNOTATION} as NIF 1.0 to the parsed * RDF graph by using the parsed segmentUri as subject * @param graph the graph * @param annotated the annotated element (e.g. a {@link Token}) * @param segmentUri the URI of the resource representing the parsed * annotated element in the graph */ public static void writePos(Graph graph, Annotated annotated, IRI segmentUri) { Value<PosTag> posTag = annotated.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posTag != null){ if(posTag.value().isMapped()){ for(Pos pos : posTag.value().getPos()){ graph.add(new TripleImpl(segmentUri, Nif20.oliaCategory.getUri(), pos.getUri())); } for(LexicalCategory cat : posTag.value().getCategories()){ graph.add(new TripleImpl(segmentUri, Nif20.oliaCategory.getUri(), cat.getUri())); } } graph.add(new TripleImpl(segmentUri, Nif20.posTag.getUri(), lf.createTypedLiteral(posTag.value().getTag()))); //set the oliaConf //remove existing conf values (e.g. for a single word phrase) setOliaConf(graph, segmentUri, posTag); } } /**
if((!disjoint(tpc.getLinkedLexicalCategories(), posTag.getCategories())) || (!disjoint(tpc.getLinkedPos(), posTag.getPosHierarchy())) || tpc.getLinkedPosTags().contains(posTag.getTag())){ if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){ for(Value<PosTag> posAnnotation : posAnnotations){ PosTag posTag = posAnnotation.value(); if(posTag.isMapped()){ if((!Collections.disjoint(tpc.getMatchedLexicalCategories(), posTag.getCategories())) || (!Collections.disjoint(tpc.getMatchedPos(), posTag.getPosHierarchy())) || tpc.getMatchedPosTags().contains(posTag.getTag())){ if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){ if((!disjoint(ProcessingState.SUB_SENTENCE_START_POS,posTag.getPosHierarchy()))){ if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){ for(Value<MorphoFeatures> morphoAnnotation : morphoAnnotations){ for(PosTag posTag : morphoAnnotation.value().getPosList()){ if(!disjoint(selectedPosTag.getCategories(),posTag.getCategories())){ mf = morphoAnnotation.value(); break selectMorphoFeature; //stop after finding the first one
if(posAnno.value().isMapped()){ for(LexicalCategory cat :posAnno.value().getCategories()){ if(!tokenLexCats.containsKey(cat)){ //do not override with lover prob tokenLexCats.put(cat, posAnno.probability()); Set<LexicalCategory> mfCats = EnumSet.noneOf(LexicalCategory.class); for(PosTag mfPos : mf.getPosList()){ mfCats.addAll(mfPos.getCategories());
@Override public Set<LexicalCategory> getCategories(PosTag posTag) { return posTag.getCategories(); }
if(tag.probability() == Value.UNKNOWN_PROBABILITY || tag.probability() >= MIN_POS_CONF || !Collections.disjoint(tag.value().getCategories(),PREF_LEX_CAT)){ posTag = tag.value(); break; posTag = tags.get(0).value(); if(posTag.hasCategory(LexicalCategory.Noun)){ if(posTag.hasCategory(LexicalCategory.Verb)){ setVerb(token);
while( j < posSequences.length && !done){ String p = posSequences[j].getOutcomes().get(i); done = j > 0 && p.equals(actPos[0].getTag()); if(!done){ actPos[j] = getPosTag(posModel,adhocTags,p,language);
private boolean isSectionBorder(Token token, String language) { Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posAnnotation != null && !Collections.disjoint(sectionBorderPosTags, posAnnotation.value().getPosHierarchy())){ return true; } else { return false; } }
posTag = adhocTags.get(posAttr.getPartOfSpeech()); if(posTag == null){ posTag = new PosTag(posAttr.getPartOfSpeech()); adhocTags.put(posAttr.getPartOfSpeech(), posTag); log.warn(" ... missing PosTag mapping for {}",posAttr.getPartOfSpeech()); sentStartOffset = offset.startOffset(); if(posTag.hasPos(Pos.Point)) { Sentence sent = at.addSentence(sentStartOffset, offset.startOffset());
PosTag posTag = pos.value(); if (posTag.hasCategory(LexicalCategory.Noun) || posTag.hasCategory(LexicalCategory.Adjective)) { nounNo++; if (!hasGoodDeterminer && posTag.hasPos(Pos.Determiner) && langDeterminerSet.contains(token.getSpan().toLowerCase())) { hasGoodDeterminer = true;
private boolean isVerb(Token token, String language) { Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); return posAnnotation == null ? false : posAnnotation.value().hasCategory(LexicalCategory.Verb); }
/** * Writes the {@link NlpAnnotations#POS_ANNOTATION} as NIF 1.0 to the parsed * RDF graph by using the parsed segmentUri as subject * @param graph the graph * @param annotated the annotated element (e.g. a {@link Token}) * @param segmentUri the URI of the resource representing the parsed * annotated element in the graph */ public static void writePos(Graph graph, Annotated annotated, IRI segmentUri) { Value<PosTag> posTag = annotated.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posTag != null){ if(posTag.value().isMapped()){ for(Pos pos : posTag.value().getPos()){ graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), pos.getUri())); } for(LexicalCategory cat : posTag.value().getCategories()){ graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), cat.getUri())); } } graph.add(new TripleImpl(segmentUri, SsoOntology.posTag.getUri(), lf.createTypedLiteral(posTag.value().getTag()))); graph.add(new TripleImpl(segmentUri, ENHANCER_CONFIDENCE, lf.createTypedLiteral(posTag.probability()))); } }
if((!disjoint(tpc.getLinkedLexicalCategories(), posTag.getCategories())) || (!disjoint(tpc.getLinkedPos(), posTag.getPosHierarchy())) || tpc.getLinkedPosTags().contains(posTag.getTag())){ if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){ for(Value<PosTag> posAnnotation : posAnnotations){ PosTag posTag = posAnnotation.value(); if(posTag.isMapped()){ if((!Collections.disjoint(tpc.getMatchedLexicalCategories(), posTag.getCategories())) || (!Collections.disjoint(tpc.getMatchedPos(), posTag.getPosHierarchy())) || tpc.getMatchedPosTags().contains(posTag.getTag())){ if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){ if((!disjoint(ProcessingState.SUB_SENTENCE_START_POS,posTag.getPosHierarchy()))){ if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){ for(Value<MorphoFeatures> morphoAnnotation : morphoAnnotations){ for(PosTag posTag : morphoAnnotation.value().getPosList()){ if(!disjoint(selectedPosTag.getCategories(),posTag.getCategories())){ mf = morphoAnnotation.value(); break selectMorphoFeature; //stop after finding the first one
new PlainLiteralImpl(getLemma(), lang))); for(PosTag pos: getPosList()){ if(pos.isMapped()){ for(LexicalCategory cat : pos.getCategories()){ result.add(new TripleImpl(textAnnotation, RDF_TYPE, cat.getUri()));
/** * If the current Token should be considered for counting distances to * negations and nouns * @param token * @param language * @return */ private boolean isCountable(Token token, String language){ Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posAnnotation != null && !Collections.disjoint(countableLexCats, posAnnotation.value().getCategories())){ return true; } else { return false; } }
+ "does not provide POS tags for each token!"); } else { posList.add(posValue.value().getTag());