/** * Sets the {@link Nif20#oliaConf} value. Note this also deletes existing * values. This mans that in the case of multiple Olia annotation (e.g. * single word phrases together with word level annotation) the last * confidence will win (still better as having two confidence values) * @param graph * @param segmentUri * @param value */ private static void setOliaConf(Graph graph, IRI segmentUri, Value<?> value) { Iterator<Triple> existingConfValues = graph.filter(segmentUri, Nif20.oliaConf.getUri(), null); while(existingConfValues.hasNext()){ existingConfValues.next(); existingConfValues.remove(); } if(value.probability() != Value.UNKNOWN_PROBABILITY){ graph.add(new TripleImpl(segmentUri, Nif20.oliaConf.getUri(), lf.createTypedLiteral(value.probability()))); } }
for(Value<PosTag> pos : token.getAnnotations(POS_ANNOTATION)){ log.trace(" - {}",pos); double score = pos.probability(); if(score == Value.UNKNOWN_PROBABILITY){ score = DEFAULT_SCORE;
if (tpc.getProcessedPhraseCategories().contains(phraseAnnotation.value().getCategory()) || tpc.getProcessedPhraseTags().contains(phraseAnnotation.value().getTag())) { if (phraseAnnotation.probability() == Value.UNKNOWN_PROBABILITY || phraseAnnotation.probability() >= tpc.getMinPhraseAnnotationProbability()) { process = true; break; } else if (phraseAnnotation.probability() == Value.UNKNOWN_PROBABILITY || phraseAnnotation.probability() >= tpc.getMinExcludePhraseAnnotationProbability()) { process = false; break;
if (tpc.getProcessedPhraseCategories().contains(phraseAnnotation.value().getCategory()) || tpc.getProcessedPhraseTags().contains(phraseAnnotation.value().getTag())) { if (phraseAnnotation.probability() == Value.UNKNOWN_PROBABILITY || phraseAnnotation.probability() >= tpc.getMinPhraseAnnotationProbability()) { process = true; break; } else if (phraseAnnotation.probability() == Value.UNKNOWN_PROBABILITY || phraseAnnotation.probability() >= tpc.getMinExcludePhraseAnnotationProbability()) { process = false; break;
process = true; Value<PosTag> posTag = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posTag != null && posTag.probability() == Value.UNKNOWN_PROBABILITY || posTag.probability() >= (minPOSConfidence/2.0)){ cats = classifier.getCategories(posTag.value()); } else { //no POS tags or probability to low boolean state = cats.contains(LexicalCategory.Adjective) || cats.contains(LexicalCategory.Noun); ignore = !state && (value.probability() == Value.UNKNOWN_PROBABILITY || value.probability() >= minPOSConfidence); process = state && (value.probability() == Value.UNKNOWN_PROBABILITY || value.probability() >= (minPOSConfidence/2.0));
@SuppressWarnings({"rawtypes", "unchecked"}) private ObjectNode writeValue(Value<?> value) { ObjectNode jValue; Class<?> valueType = value.value().getClass(); ValueTypeSerializer vts = valueTypeSerializerRegistry.getSerializer(valueType); if(vts != null){ jValue = vts.serialize(mapper,value.value()); //TODO assert that jValue does not define "class" and "prob"! } else { //use the default binding and the "data" field jValue = mapper.createObjectNode(); jValue.put("value", mapper.valueToTree(value.value())); } jValue.put("class",valueType.getName()); if(value.probability() != Value.UNKNOWN_PROBABILITY){ jValue.put("prob", value.probability()); } return jValue; } }
@SuppressWarnings({"rawtypes", "unchecked"}) private ObjectNode writeValue(Value<?> value) { ObjectNode jValue; Class<?> valueType = value.value().getClass(); ValueTypeSerializer vts = valueTypeSerializerRegistry.getSerializer(valueType); if(vts != null){ jValue = vts.serialize(mapper,value.value()); //TODO assert that jValue does not define "class" and "prob"! } else { //use the default binding and the "data" field jValue = mapper.createObjectNode(); jValue.put("value", mapper.valueToTree(value.value())); } jValue.put("class",valueType.getName()); if(value.probability() != Value.UNKNOWN_PROBABILITY){ jValue.put("prob", value.probability()); } return jValue; } }
if(tags != null && !tags.isEmpty()){ for(Value<PosTag> tag : tags){ if(tag.probability() == Value.UNKNOWN_PROBABILITY || tag.probability() >= MIN_POS_CONF || !Collections.disjoint(tag.value().getCategories(),PREF_LEX_CAT)){ posTag = tag.value();
(!disjoint(tpc.getLinkedPos(), posTag.getPosHierarchy())) || tpc.getLinkedPosTags().contains(posTag.getTag())){ if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){ selectedPosTag = posTag; isLinkablePos = true; } else if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinExcludePosAnnotationProbability()){ (!Collections.disjoint(tpc.getMatchedPos(), posTag.getPosHierarchy())) || tpc.getMatchedPosTags().contains(posTag.getTag())){ if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){ } else if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinExcludePosAnnotationProbability()){ if(selectedPosTag == null){ //do not override existing values PosTag posTag = posAnnotation.value(); if((!disjoint(ProcessingState.SUB_SENTENCE_START_POS,posTag.getPosHierarchy()))){ if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){ isSubSentenceStart = true; } else if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinExcludePosAnnotationProbability()){ isSubSentenceStart = false;
(!disjoint(tpc.getLinkedPos(), posTag.getPosHierarchy())) || tpc.getLinkedPosTags().contains(posTag.getTag())){ if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){ selectedPosTag = posTag; isLinkablePos = true; } else if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinExcludePosAnnotationProbability()){ (!Collections.disjoint(tpc.getMatchedPos(), posTag.getPosHierarchy())) || tpc.getMatchedPosTags().contains(posTag.getTag())){ if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){ } else if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinExcludePosAnnotationProbability()){ if(selectedPosTag == null){ //do not override existing values PosTag posTag = posAnnotation.value(); if((!disjoint(ProcessingState.SUB_SENTENCE_START_POS,posTag.getPosHierarchy()))){ if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinPosAnnotationProbability()){ isSubSentenceStart = true; } else if(posAnnotation.probability() == Value.UNKNOWN_PROBABILITY || posAnnotation.probability() >= tpc.getMinExcludePosAnnotationProbability()){ isSubSentenceStart = false;
/** * Writes a {@link NlpAnnotations#PHRASE_ANNOTATION} as NIF 1.0 to the * parsed RDF graph by using the segmentUri as subject * @param graph the graph * @param annotated the annotated element (e.g. a {@link Chunk}) * @param segmentUri the URI of the resource representing the parsed * annotated element in the graph */ public static void writePhrase(Graph graph, Annotated annotated, IRI segmentUri) { Value<PhraseTag> phraseTag = annotated.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION); if(phraseTag != null){ IRI phraseTypeUri = LEXICAL_TYPE_TO_PHRASE_TYPE.get(phraseTag.value().getCategory()); if(phraseTypeUri != null){ //add the oliaLink for the Phrase graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), phraseTypeUri)); graph.add(new TripleImpl(segmentUri, ENHANCER_CONFIDENCE, lf.createTypedLiteral(phraseTag.probability()))); } } }
/** * Writes a {@link NlpAnnotations#PHRASE_ANNOTATION} as NIF 1.0 to the * parsed RDF graph by using the segmentUri as subject * @param graph the graph * @param annotated the annotated element (e.g. a {@link Chunk}) * @param segmentUri the URI of the resource representing the parsed * annotated element in the graph */ public static void writePhrase(Graph graph, Annotated annotated, IRI segmentUri) { Value<PhraseTag> phraseTag = annotated.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION); if(phraseTag != null){ IRI phraseTypeUri = LEXICAL_TYPE_TO_PHRASE_TYPE.get(phraseTag.value().getCategory()); if(phraseTypeUri != null){ //add the oliaLink for the Phrase graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), phraseTypeUri)); graph.add(new TripleImpl(segmentUri, ENHANCER_CONFIDENCE, lf.createTypedLiteral(phraseTag.probability()))); } } }
for(LexicalCategory cat :posAnno.value().getCategories()){ if(!tokenLexCats.containsKey(cat)){ //do not override with lover prob tokenLexCats.put(cat, posAnno.probability());
metadata.add(new TripleImpl(ta,DC_TYPE,nerAnno.value().getType())); if(nerAnno.probability() >= 0) { metadata.add(new TripleImpl(ta, ENHANCER_CONFIDENCE, lf.createTypedLiteral(nerAnno.probability())));
/** * Writes the {@link NlpAnnotations#POS_ANNOTATION} as NIF 1.0 to the parsed * RDF graph by using the parsed segmentUri as subject * @param graph the graph * @param annotated the annotated element (e.g. a {@link Token}) * @param segmentUri the URI of the resource representing the parsed * annotated element in the graph */ public static void writePos(Graph graph, Annotated annotated, IRI segmentUri) { Value<PosTag> posTag = annotated.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posTag != null){ if(posTag.value().isMapped()){ for(Pos pos : posTag.value().getPos()){ graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), pos.getUri())); } for(LexicalCategory cat : posTag.value().getCategories()){ graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), cat.getUri())); } } graph.add(new TripleImpl(segmentUri, SsoOntology.posTag.getUri(), lf.createTypedLiteral(posTag.value().getTag()))); graph.add(new TripleImpl(segmentUri, ENHANCER_CONFIDENCE, lf.createTypedLiteral(posTag.probability()))); } }
/** * Writes the {@link NlpAnnotations#POS_ANNOTATION} as NIF 1.0 to the parsed * RDF graph by using the parsed segmentUri as subject * @param graph the graph * @param annotated the annotated element (e.g. a {@link Token}) * @param segmentUri the URI of the resource representing the parsed * annotated element in the graph */ public static void writePos(Graph graph, Annotated annotated, IRI segmentUri) { Value<PosTag> posTag = annotated.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posTag != null){ if(posTag.value().isMapped()){ for(Pos pos : posTag.value().getPos()){ graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), pos.getUri())); } for(LexicalCategory cat : posTag.value().getCategories()){ graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), cat.getUri())); } } graph.add(new TripleImpl(segmentUri, SsoOntology.posTag.getUri(), lf.createTypedLiteral(posTag.value().getTag()))); graph.add(new TripleImpl(segmentUri, ENHANCER_CONFIDENCE, lf.createTypedLiteral(posTag.probability()))); } }