@Override public int compareTo(Span o) { if(context != null && o.getContext() != null && !context.equals(o.getContext())){ log.warn("Comparing Spans with different Context. This is not an " + "intended usage of this class as start|end|type parameters " + "do not have a natural oder over different texts."); log.info("This will sort Spans based on start|end|type parameters "+ "regardless that the might be over different texts!"); //TODO consider throwing an IllegalStateExcetion! } //Compare Integers ASC (used here three times) // (x < y) ? -1 : ((x == y) ? 0 : 1); int start = (span[0] < o.getStart()) ? -1 : ((span[0] == o.getStart()) ? 0 : 1); if(start == 0){ //sort end in DESC order int end = (span[1] < o.getEnd()) ? 1 : ((span[1] == o.getEnd()) ? 0 : -1); //if start AND end is the same compare based on the span type //Natural order of span types is defined by the Enum.ordinal() int o1 = getType().ordinal(); int o2 = o.getType().ordinal(); return end != 0 ? end : (o1 < o2) ? -1 : ((o1 == o2) ? 0 : 1); } else { return start; } }
String currentTokenString = currentToken.getSpan().toLowerCase(); return new PlaceAdjectival(currentToken.getStart(), currentToken.getEnd(), langPlaceAdjectivalsMap.get(currentTokenString)); String previousTokenString = previousToken.getSpan().toLowerCase(); concatTokens = new StringBuilder(); concatTokens.append(previousTokenString); return new PlaceAdjectival(previousToken.getStart(), currentToken.getEnd(), langPlaceAdjectivalsMap.get(concatTokensString)); String nextTokenString = nextToken.getSpan().toLowerCase(); concatTokens = new StringBuilder(); concatTokens.append(currentTokenString); return new PlaceAdjectival(currentToken.getStart(), nextToken.getEnd(), langPlaceAdjectivalsMap.get(concatTokensString));
private void logChunks(AnalysedText at){ Iterator<Span> it = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence, SpanTypeEnum.Chunk)); while(it.hasNext()){ Span span = it.next(); if(span.getType() == SpanTypeEnum.Chunk){ log.trace(" > {} {}",span,span.getSpan()); } else { log.trace(" > {}",span); } for(Value<PhraseTag> value : span.getAnnotations(PHRASE_ANNOTATION)){ log.trace(" - {}",value); } } }
String nounPhraseText = nounPhrase.getChunk().getSpan().toLowerCase(); int classStart = 0; int classEnd = 0; && (matchedClass == null || label.split("\\s").length > matchedClass.split("\\s").length)) { matchedClass = label; classStart = nounPhrase.getChunk().getStart() + nounPhraseText.indexOf(label); classEnd = classStart + label.length(); IRI nerType = ner.getAnnotation(NlpAnnotations.NER_ANNOTATION).value().getType(); if ((npNer.getStart() >= classStart && npNer.getStart() <= classEnd) || (npNer.getEnd() >= classStart && npNer.getEnd() <= classEnd)) continue; IRI npNerType = npNer.getAnnotation(NlpAnnotations.NER_ANNOTATION).value().getType(); Set<String> rulesOntologyAttr = new HashSet<String>(); .getAnnotation(NlpAnnotations.NER_ANNOTATION).value().getType());
@Override public ObjectNode serialize(ObjectMapper mapper, CorefFeature coref) { ObjectNode jCoref = mapper.createObjectNode(); jCoref.put(IS_REPRESENTATIVE_TAG, coref.isRepresentative()); Set<Span> mentions = coref.getMentions(); ArrayNode jMentions = mapper.createArrayNode(); for(Span mention : mentions) { ObjectNode jMention = mapper.createObjectNode(); jMention.put(MENTION_TYPE_TAG, mention.getType().toString()); jMention.put(MENTION_START_TAG, mention.getStart()); jMention.put(MENTION_END_TAG, mention.getEnd()); jMentions.add(jMention); } jCoref.put(MENTIONS_TAG, jMentions); return jCoref; }
private ObjectNode writeSpan(Span span) throws IOException { log.trace("wirte {}",span); ObjectNode jSpan = mapper.createObjectNode(); jSpan.put("type", span.getType().name()); jSpan.put("start", span.getStart()); jSpan.put("end", span.getEnd()); for(String key : span.getKeys()){ List<Value<?>> values = span.getValues(key); if(values.size() == 1){ jSpan.put(key, writeValue(values.get(0))); } else { ArrayNode jValues = jSpan.putArray(key); for(Value<?> value : values){ jValues.add(writeValue(value)); } jSpan.put(key, jValues); } } log.trace(" ... {}",jSpan); return jSpan; }
while(tokenIt.hasNext()){ Span span = tokenIt.next(); switch (span.getType()) { case Token: Token word = (Token)span; Integer wordIndex = sentimentTokens.size(); Value<Double> sentimentAnnotation = span.getAnnotation(SENTIMENT_ANNOTATION); boolean addToList = false; Sentiment sentiment = null;
@Override public boolean evaluate(Object span) { return types.contains(((Span)span).getType()); } });
Value<PosTag> pos = token.getAnnotation(NlpAnnotations.POS_ANNOTATION); && langDeterminerSet.contains(token.getSpan().toLowerCase())) { hasGoodDeterminer = true;
int nounPhraseSentenceNo = nounPhrase.getSentenceNo(); if (nounPhrase.getChunk().getStart() > ner.getStart() && (maxDistance != Constants.MAX_DISTANCE_NO_CONSTRAINT && nounPhraseSentenceNo > nerSentenceNo && nounPhraseSentenceNo - nerSentenceNo <= maxDistance)) { Span chunk = nounPhrase.getChunk(); chunk.addAnnotation(COREF_ANNOTATION, Value.value(new CorefFeature(false, coreferencedNer))); corefs.add(chunk); ner.addAnnotation(COREF_ANNOTATION, Value.value(new CorefFeature(true, corefs)));
Span chunk = chunks.next(); Value<NerTag> ner = chunk.getAnnotation(NlpAnnotations.NER_ANNOTATION); if (ner != null) { sectionNers.add(chunk); Value<PhraseTag> phrase = chunk.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION); if (phrase != null && phrase.value().getCategory() == LexicalCategory.Noun) { sectionNounPhrases.add(new NounPhrase(chunk, sentenceCnt));
Object value; if(parser != null){ value = parser.parse(jValue, span.getContext()); } else { JsonNode valueNode = jValue.path("value"); span.addValue(key, Value.value(value)); } else { span.addValue(key, Value.value(value,jProb.getDoubleValue()));
private void logAnnotations(AnalysedText at){ Iterator<Span> it = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence, SpanTypeEnum.Token)); while(it.hasNext()){ Span span = it.next(); log.trace(" > {}",span); for(Value<PosTag> value : span.getAnnotations(POS_ANNOTATION)){ log.trace(" - {}",value); } } } /**
IRI segment = Nif20Helper.getNifRFC5147URI(base, span.getStart(), span.getType() == SpanTypeEnum.Text ? -1 : span.getEnd()); if(!contextOnlyUriScheme || span.getType() == SpanTypeEnum.Text){ graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.RFC5147String.getUri())); if(span.getEnd() - span.getStart() < 100){ graph.add(new TripleImpl(segment, Nif20.anchorOf.getUri(), new PlainLiteralImpl(span.getSpan(),language))); } else { graph.add(new TripleImpl(segment, Nif20.head.getUri(), new PlainLiteralImpl(span.getSpan().substring(0,10),language))); lf.createTypedLiteral(span.getStart()))); graph.add(new TripleImpl(segment, Nif20.endIndex.getUri(), lf.createTypedLiteral(span.getEnd()))); String content = text.getSpan(); if(span.getType() != SpanTypeEnum.Text){ int prefixStart = Math.max(0, span.getStart() - DEFAULT_PREFIX_SUFFIX_LENGTH); graph.add(new TripleImpl(segment, Nif20.before.getUri(), new PlainLiteralImpl( content.substring(prefixStart, span.getStart()), language))); int suffixEnd = Math.min(span.getEnd() + DEFAULT_PREFIX_SUFFIX_LENGTH, text.getEnd()); graph.add(new TripleImpl(segment, Nif20.after.getUri(), new PlainLiteralImpl( content.substring(span.getEnd(), suffixEnd), language))); switch (span.getType()) { case Token: graph.add(new TripleImpl(segment, RDF_TYPE, Nif20.Word.getUri()));
@Override public ObjectNode serialize(ObjectMapper mapper, CorefFeature coref) { ObjectNode jCoref = mapper.createObjectNode(); jCoref.put(IS_REPRESENTATIVE_TAG, coref.isRepresentative()); Set<Span> mentions = coref.getMentions(); ArrayNode jMentions = mapper.createArrayNode(); for(Span mention : mentions) { ObjectNode jMention = mapper.createObjectNode(); jMention.put(MENTION_TYPE_TAG, mention.getType().toString()); jMention.put(MENTION_START_TAG, mention.getStart()); jMention.put(MENTION_END_TAG, mention.getEnd()); jMentions.add(jMention); } jCoref.put(MENTIONS_TAG, jMentions); return jCoref; }
private ObjectNode writeSpan(Span span) throws IOException { log.trace("wirte {}",span); ObjectNode jSpan = mapper.createObjectNode(); jSpan.put("type", span.getType().name()); jSpan.put("start", span.getStart()); jSpan.put("end", span.getEnd()); for(String key : span.getKeys()){ List<Value<?>> values = span.getValues(key); if(values.size() == 1){ jSpan.put(key, writeValue(values.get(0))); } else { ArrayNode jValues = jSpan.putArray(key); for(Value<?> value : values){ jValues.add(writeValue(value)); } jSpan.put(key, jValues); } } log.trace(" ... {}",jSpan); return jSpan; }
protected SpanImpl(AnalysedTextImpl analysedText, SpanTypeEnum type, Span relativeTo,int start,int end) { this(type, relativeTo == null ? start : relativeTo.getStart()+start, relativeTo == null ? end : relativeTo.getStart()+end); setContext(analysedText); //check that Spans that are created relative to an other do not cross //the borders of that span if(relativeTo != null && relativeTo.getEnd() < getEnd()){ throw new IllegalArgumentException("Illegal span ["+start+','+end + "] for "+type+" relative to "+relativeTo+" : Span of the " + " contained Token MUST NOT extend the others!"); } }
switch (span.getType()) { case Sentence: if(sentence != null){ Value<Double> sentiment = span.getAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION); if(sentiment != null && sentiment.value() != null){ metadata.add(new TripleImpl(current, SENTIMENT_PROPERTY,
@Override public boolean evaluate(Object span) { return types.contains(((Span)span).getType()); } });