private boolean isSectionBorder(Token token, String language) { Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posAnnotation != null && !Collections.disjoint(sectionBorderPosTags, posAnnotation.value().getPosHierarchy())){ return true; } else { return false; } }
private boolean isCoordinatingConjuction(Token token, String language) { Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); return posAnnotation == null ? false : posAnnotation.value().getPosHierarchy().contains(Pos.CoordinatingConjunction); }
/** * used for trace level logging of Tokens part of a chunk * @param token * @return */ private String logPosCategories(Token token){ List<Value<PosTag>> posTags = token.getAnnotations(POS_ANNOTATION); List<String> catNames = new ArrayList<String>(posTags.size()); for(Value<PosTag> tag : posTags){ Set<LexicalCategory> cats = tag.value().getCategories(); if(cats.size() > 1){ catNames.add(cats.toString()); } else if(!cats.isEmpty()){ catNames.add(cats.iterator().next().toString()); } else { catNames.add(tag.value().getTag()); } } return catNames.toString(); }
private boolean isPronoun(Token token, String language) { Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); return posAnnotation == null ? false : posAnnotation.value().getPosHierarchy().contains(Pos.Pronoun); }
/** * Checks if the parsed {@link Token} represents an negation * @param token the word * @param language the language * @return <code>true</code> if the {@link Token} represents a negation. * Otherwise <code>false</code> */ private boolean isNegation(Token token, String language) { Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posAnnotation != null && !Collections.disjoint(negativePosTags, posAnnotation.value().getPosHierarchy())){ return true; } else { return false; } }
/** * If the current Token should be considered for counting distances to * negations and nouns * @param token * @param language * @return */ private boolean isCountable(Token token, String language){ Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posAnnotation != null && !Collections.disjoint(countableLexCats, posAnnotation.value().getCategories())){ return true; } else { return false; } }
/** * Checks if the parsed {@link Token} represents an negation * @param token the word * @param index the index of the token relative to the sentence | section * @param language the language * @return <code>true</code> if the {@link Token} represents a negation. * Otherwise <code>false</code> */ private boolean isNoun(Token token, boolean firstTokenInSentence, String language) { String word = token.getSpan(); if(!firstTokenInSentence && !word.isEmpty() && Character.isUpperCase(word.charAt(0))){ return true; //assume all upper case tokens are Nouns } Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posAnnotation != null && (posAnnotation.value().hasCategory(LexicalCategory.Noun) || posAnnotation.value().getPosHierarchy().contains(Pos.CardinalNumber))){ return true; } return false; } /**
private boolean isVerb(Token token, String language) { Value<PosTag> posAnnotation = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); return posAnnotation == null ? false : posAnnotation.value().hasCategory(LexicalCategory.Verb); }
@SuppressWarnings({"rawtypes", "unchecked"}) private ObjectNode writeValue(Value<?> value) { ObjectNode jValue; Class<?> valueType = value.value().getClass(); ValueTypeSerializer vts = valueTypeSerializerRegistry.getSerializer(valueType); if(vts != null){ jValue = vts.serialize(mapper,value.value()); //TODO assert that jValue does not define "class" and "prob"! } else { //use the default binding and the "data" field jValue = mapper.createObjectNode(); jValue.put("value", mapper.valueToTree(value.value())); } jValue.put("class",valueType.getName()); if(value.probability() != Value.UNKNOWN_PROBABILITY){ jValue.put("prob", value.probability()); } return jValue; } }
@SuppressWarnings({"rawtypes", "unchecked"}) private ObjectNode writeValue(Value<?> value) { ObjectNode jValue; Class<?> valueType = value.value().getClass(); ValueTypeSerializer vts = valueTypeSerializerRegistry.getSerializer(valueType); if(vts != null){ jValue = vts.serialize(mapper,value.value()); //TODO assert that jValue does not define "class" and "prob"! } else { //use the default binding and the "data" field jValue = mapper.createObjectNode(); jValue.put("value", mapper.valueToTree(value.value())); } jValue.put("class",valueType.getName()); if(value.probability() != Value.UNKNOWN_PROBABILITY){ jValue.put("prob", value.probability()); } return jValue; } }
@Override public boolean evaluate(Object o) { if(o instanceof Chunk){ Chunk chunk = (Chunk)o; Value<NerTag> nerValue = chunk.getAnnotation(NER_ANNOTATION); if(nerValue != null){ NerTag nerTag = nerValue.value(); String nerType = nerTag.getType() != null ? nerTag.getType().getUnicodeString() : null; if( wildcardType || neTypes.contains(nerTag.getTag()) || (nerType != null && neTypes.contains(nerType))){ int[] span = new int[]{chunk.getStart(), chunk.getEnd()}; Set<String> types = nePhrasesTypes.get(span); if(types == null){ types = new HashSet<String>(4); nePhrasesTypes.put(span, types); } types.add(nerType); types.add(nerTag.getTag()); return true; } } } return false; } }
/** * Writes a {@link NlpAnnotations#PHRASE_ANNOTATION} as NIF 1.0 to the * parsed RDF graph by using the segmentUri as subject * @param graph the graph * @param annotated the annotated element (e.g. a {@link Chunk}) * @param segmentUri the URI of the resource representing the parsed * annotated element in the graph */ public static void writePhrase(Graph graph, Annotated annotated, IRI segmentUri) { Value<PhraseTag> phraseTag = annotated.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION); if(phraseTag != null){ IRI phraseTypeUri = LEXICAL_TYPE_TO_PHRASE_TYPE.get(phraseTag.value().getCategory()); if(phraseTypeUri != null){ //add the oliaLink for the Phrase graph.add(new TripleImpl(segmentUri, Nif20.oliaCategory.getUri(), phraseTypeUri)); setOliaConf(graph, segmentUri, phraseTag); } } }
Boolean process = null; for (Value<PhraseTag> phraseAnnotation : chunk.getAnnotations(PHRASE_ANNOTATION)) { if (tpc.getProcessedPhraseCategories().contains(phraseAnnotation.value().getCategory()) || tpc.getProcessedPhraseTags().contains(phraseAnnotation.value().getTag())) { if (phraseAnnotation.probability() == Value.UNKNOWN_PROBABILITY || phraseAnnotation.probability() >= tpc.getMinPhraseAnnotationProbability()) {
Boolean process = null; for (Value<PhraseTag> phraseAnnotation : chunk.getAnnotations(PHRASE_ANNOTATION)) { if (tpc.getProcessedPhraseCategories().contains(phraseAnnotation.value().getCategory()) || tpc.getProcessedPhraseTags().contains(phraseAnnotation.value().getTag())) { if (phraseAnnotation.probability() == Value.UNKNOWN_PROBABILITY || phraseAnnotation.probability() >= tpc.getMinPhraseAnnotationProbability()) {
/** * Writes the {@link NlpAnnotations#POS_ANNOTATION} as NIF 1.0 to the parsed * RDF graph by using the parsed segmentUri as subject * @param graph the graph * @param annotated the annotated element (e.g. a {@link Token}) * @param segmentUri the URI of the resource representing the parsed * annotated element in the graph */ public static void writePos(Graph graph, Annotated annotated, IRI segmentUri) { Value<PosTag> posTag = annotated.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posTag != null){ if(posTag.value().isMapped()){ for(Pos pos : posTag.value().getPos()){ graph.add(new TripleImpl(segmentUri, Nif20.oliaCategory.getUri(), pos.getUri())); } for(LexicalCategory cat : posTag.value().getCategories()){ graph.add(new TripleImpl(segmentUri, Nif20.oliaCategory.getUri(), cat.getUri())); } } graph.add(new TripleImpl(segmentUri, Nif20.posTag.getUri(), lf.createTypedLiteral(posTag.value().getTag()))); //set the oliaConf //remove existing conf values (e.g. for a single word phrase) setOliaConf(graph, segmentUri, posTag); } } /**
/** * Writes the {@link NlpAnnotations#POS_ANNOTATION} as NIF 1.0 to the parsed * RDF graph by using the parsed segmentUri as subject * @param graph the graph * @param annotated the annotated element (e.g. a {@link Token}) * @param segmentUri the URI of the resource representing the parsed * annotated element in the graph */ public static void writePos(Graph graph, Annotated annotated, IRI segmentUri) { Value<PosTag> posTag = annotated.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posTag != null){ if(posTag.value().isMapped()){ for(Pos pos : posTag.value().getPos()){ graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), pos.getUri())); } for(LexicalCategory cat : posTag.value().getCategories()){ graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), cat.getUri())); } } graph.add(new TripleImpl(segmentUri, SsoOntology.posTag.getUri(), lf.createTypedLiteral(posTag.value().getTag()))); graph.add(new TripleImpl(segmentUri, ENHANCER_CONFIDENCE, lf.createTypedLiteral(posTag.probability()))); } }
/** * Writes the {@link NlpAnnotations#POS_ANNOTATION} as NIF 1.0 to the parsed * RDF graph by using the parsed segmentUri as subject * @param graph the graph * @param annotated the annotated element (e.g. a {@link Token}) * @param segmentUri the URI of the resource representing the parsed * annotated element in the graph */ public static void writePos(Graph graph, Annotated annotated, IRI segmentUri) { Value<PosTag> posTag = annotated.getAnnotation(NlpAnnotations.POS_ANNOTATION); if(posTag != null){ if(posTag.value().isMapped()){ for(Pos pos : posTag.value().getPos()){ graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), pos.getUri())); } for(LexicalCategory cat : posTag.value().getCategories()){ graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), cat.getUri())); } } graph.add(new TripleImpl(segmentUri, SsoOntology.posTag.getUri(), lf.createTypedLiteral(posTag.value().getTag()))); graph.add(new TripleImpl(segmentUri, ENHANCER_CONFIDENCE, lf.createTypedLiteral(posTag.probability()))); } }
/** * Writes a {@link NlpAnnotations#PHRASE_ANNOTATION} as NIF 1.0 to the * parsed RDF graph by using the segmentUri as subject * @param graph the graph * @param annotated the annotated element (e.g. a {@link Chunk}) * @param segmentUri the URI of the resource representing the parsed * annotated element in the graph */ public static void writePhrase(Graph graph, Annotated annotated, IRI segmentUri) { Value<PhraseTag> phraseTag = annotated.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION); if(phraseTag != null){ IRI phraseTypeUri = LEXICAL_TYPE_TO_PHRASE_TYPE.get(phraseTag.value().getCategory()); if(phraseTypeUri != null){ //add the oliaLink for the Phrase graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), phraseTypeUri)); graph.add(new TripleImpl(segmentUri, ENHANCER_CONFIDENCE, lf.createTypedLiteral(phraseTag.probability()))); } } }
/** * Writes a {@link NlpAnnotations#PHRASE_ANNOTATION} as NIF 1.0 to the * parsed RDF graph by using the segmentUri as subject * @param graph the graph * @param annotated the annotated element (e.g. a {@link Chunk}) * @param segmentUri the URI of the resource representing the parsed * annotated element in the graph */ public static void writePhrase(Graph graph, Annotated annotated, IRI segmentUri) { Value<PhraseTag> phraseTag = annotated.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION); if(phraseTag != null){ IRI phraseTypeUri = LEXICAL_TYPE_TO_PHRASE_TYPE.get(phraseTag.value().getCategory()); if(phraseTypeUri != null){ //add the oliaLink for the Phrase graph.add(new TripleImpl(segmentUri, SsoOntology.oliaLink.getUri(), phraseTypeUri)); graph.add(new TripleImpl(segmentUri, ENHANCER_CONFIDENCE, lf.createTypedLiteral(phraseTag.probability()))); } } }
/** * Gets an Entity from the configured {@link Site} based on the NER text and type. * * @param ner * @param language * @return * @throws EngineException */ private Entity lookupEntity(Span ner, String language) throws EngineException { Site site = getReferencedSite(); FieldQueryFactory queryFactory = site == null ? entityHub.getQueryFactory() : site.getQueryFactory(); FieldQuery query = queryFactory.createFieldQuery(); Constraint labelConstraint; String namedEntityLabel = ner.getSpan(); labelConstraint = new TextConstraint(namedEntityLabel, false, language, null); query.setConstraint(RDFS_LABEL.getUnicodeString(), labelConstraint); query.setConstraint(RDF_TYPE.getUnicodeString(), new ReferenceConstraint(ner.getAnnotation(NlpAnnotations.NER_ANNOTATION).value().getType() .getUnicodeString())); query.setLimit(1); QueryResultList<Entity> results = site == null ? // if site is NULL entityHub.findEntities(query) : // use the Entityhub site.findEntities(query); // else the referenced site if (results.isEmpty()) return null; // We set the limit to 1 so if it found anything it should contain just 1 entry return results.iterator().next(); }