org.apache.stanbol.enhancer.nlp.model.Chunk java code examples

@Override
public void reduce(TagLL[] head) {
  //(1) reduce Tags based on named entity phrases. 
  for(TagLL tag = head[0]; tag != null; tag = tag.getNextTag()) {
    int start = tag.getStartOffset();
    int end = tag.getEndOffset();
    Chunk nePhrase = nePhrases.isEmpty() ? null : nePhrases.get(0);
    while(nePhrase != null && nePhrase.getEnd() <= start){
      nePhrases.remove(0);
      nePhrase = nePhrases.isEmpty() ? null : nePhrases.get(0);
    }
    if(nePhrase == null || !(start <= nePhrase.getStart() && end >= nePhrase.getEnd())){
      //does not cover any named entity phrase
      tag.removeLL(); //remove the tag from the cluster
      if(log.isTraceEnabled()){
        log.trace(" > reduce tag {} - does not cover {}", tag, nePhrase);
      }
    } else if(log.isTraceEnabled()) {//the current Tag coveres a named entity phrase
      log.trace(" > keep tag {} for {}", tag, nePhrase);
    }
  }
}

          offset.startOffset(), offset.endOffset(), termAtt});
while((neChunk == null || neChunk.getEnd() < offset.startOffset()) && neChunks.hasNext()){
  neChunk = neChunks.next();
  nePhrases.add(neChunk);
  log.debug("lookup percentage: {}",lookupCount*100/(float)incrementCount);
  return false;
} else if(offset.endOffset() > neChunk.getStart() 
    || offset.startOffset() < neChunk.getEnd()){
    log.trace("lookup: token [{},{}]: {} | named Entity [{},{}]:{}", 
      new Object[]{ offset.startOffset(), offset.endOffset(), 
        termAtt, neChunk.getStart(), neChunk.getEnd(),
        neChunk.getSpan()});

Chunk chunk = chunkFactory.createChunk(current.get(0), lastConsumedToken);
chunk.addAnnotation(PHRASE_ANNOTATION, Value.value(phraseTag));
if(log.isDebugEnabled()){
  log.debug("  << add {} phrase {} '{}'", new Object[]{
      phraseType.getPhraseType().name(), chunk,chunk.getSpan()});

  @Override
  public boolean evaluate(Object o) {
    if(o instanceof Chunk){
      Chunk chunk = (Chunk)o;
      Value<NerTag> nerValue = chunk.getAnnotation(NER_ANNOTATION);
      if(nerValue != null){
        NerTag nerTag = nerValue.value();
        String nerType = nerTag.getType() != null ? 
            nerTag.getType().getUnicodeString() : null;
        if( wildcardType || neTypes.contains(nerTag.getTag())
            || (nerType != null && neTypes.contains(nerType))){
          int[] span = new int[]{chunk.getStart(), chunk.getEnd()};
          Set<String> types = nePhrasesTypes.get(span);
          if(types == null){
            types = new HashSet<String>(4);
            nePhrasesTypes.put(span, types);
          }
          types.add(nerType);
          types.add(nerTag.getTag());
          return true;
        }
      }
    }
    return false;
  }
}

chunk.addAnnotation(NER_ANNOTATION, Value.value(nerTag, confidence));

if(log.isTraceEnabled()){
  log.trace("  + lookup because {} is part of Named Entity '{}'",
    token.token, token.inChunk.chunk.getSpan());
if(log.isTraceEnabled()){
  log.trace("  + lookup because {} is part of a linkable chunk '{}'", 
    token.token, token.inChunk.chunk.getSpan());

log.debug(">> Chunk: (type:{}, startPos: {}) text: '{}'",
  new Object []{
    chunkData.chunk.getType(),
    chunkData.startToken,
    chunkData.chunk.getSpan()
  });
  tokenData.morpho != null ? ("(lemma: "+tokenData.morpho.getLemma()+") ") : "",
  tokenData.token.getAnnotations(POS_ANNOTATION),
  tokenData.inChunk != null ? tokenData.inChunk.chunk.getSpan() : "none"});
if(log.isDebugEnabled()){
  log.debug(" << end Chunk {} '{}' @pos: {}", new Object[]{
      activeChunk.chunk, activeChunk.chunk.getSpan(),
      activeChunk.endToken});

this.chunk = chunk;
Boolean process = null;
for (Value<PhraseTag> phraseAnnotation : chunk.getAnnotations(PHRASE_ANNOTATION)) {
  if (tpc.getProcessedPhraseCategories().contains(phraseAnnotation.value().getCategory())
    || tpc.getProcessedPhraseTags().contains(phraseAnnotation.value().getTag())) {
isNamedEntity = chunk.getAnnotation(NlpAnnotations.NER_ANNOTATION) != null;
if(process == null && isNamedEntity &&
    tpc.getProcessedPhraseCategories().contains(LexicalCategory.Noun)){

/**
 * Getter for the end character position of the text
 * @return the end character position
 */
public int getEndChar(){
  return chunk.getEnd();
}
/**

/**
 * Getter for the start character position
 * @return the start character position of the selected text span.
 */
public int getStartChar(){
  return chunk.getStart();
}
/**

    tokenList.get(i-chunkTokenCount).getStart(), 
    tokenList.get(i-1).getEnd());
  chunk.addAnnotation(PHRASE_ANNOTATION, 
    new Value<PhraseTag>(tag,
        chunkProps/(double)chunkTokenCount));
  tokenList.get(i-chunkTokenCount).getStart(), 
  tokenList.get(i-1).getEnd());
chunk.addAnnotation(PHRASE_ANNOTATION, 
  new Value<PhraseTag>(tag,
      chunkProps/(double)chunkTokenCount));

new Object[]{token.index,token.getTokenText(),token.getTokenLemma(),
  token.isLinkable, token.isMatchable, token.inChunk != null ? 
      (token.inChunk.chunk + " "+ token.inChunk.chunk.getSpan()) : "none"});

log.debug(">> Chunk: (type:{}, startPos: {}) text: '{}'",
  new Object []{
    chunkData.chunk.getType(),
    chunkData.startToken,
    chunkData.chunk.getSpan()
  });
  tokenData.morpho != null ? ("(lemma: "+tokenData.morpho.getLemma()+") ") : "",
  tokenData.token.getAnnotations(POS_ANNOTATION),
  tokenData.inChunk != null ? tokenData.inChunk.chunk.getSpan() : "none"});
if(log.isDebugEnabled()){
  log.debug(" << end Chunk {} '{}' @pos: {}", new Object[]{
      activeChunk.chunk, activeChunk.chunk.getSpan(),
      activeChunk.endToken});

this.chunk = chunk;
Boolean process = null;
for (Value<PhraseTag> phraseAnnotation : chunk.getAnnotations(PHRASE_ANNOTATION)) {
  if (tpc.getProcessedPhraseCategories().contains(phraseAnnotation.value().getCategory())
    || tpc.getProcessedPhraseTags().contains(phraseAnnotation.value().getTag())) {
isNamedEntity = chunk.getAnnotation(NlpAnnotations.NER_ANNOTATION) != null;
if(process == null && isNamedEntity &&
    tpc.getProcessedPhraseCategories().contains(LexicalCategory.Noun)){

/**
 * Getter for the end character position of the text
 * @return the end character position
 */
public int getEndChar(){
  return chunk.getEnd();
}
/**

/**
 * Getter for the start character position
 * @return the start character position of the selected text span.
 */
public int getStartChar(){
  return chunk.getStart();
}
/**

chunk.addAnnotation(NlpAnnotations.NER_ANNOTATION, Value.value(ner.tag));

new Object[]{token.index,token.getTokenText(),token.getTokenLemma(),
  token.isLinkable, token.isMatchable, token.inChunk != null ? 
      (token.inChunk.chunk + " "+ token.inChunk.chunk.getSpan()) : "none"});

if(log.isTraceEnabled()){
  log.trace("  ... checking match with chunk {}: {}", 
    cd.chunk, cd.chunk.getSpan());

if(log.isTraceEnabled()){
  log.trace("  ... checking match with chunk {}: {}", 
    cd.chunk, cd.chunk.getSpan());

Most used methods

Popular in Java

Creating JSON documents from java classes using gson
getSupportFragmentManager (FragmentActivity)
setRequestProperty (URLConnection)
onCreateOptionsMenu (Activity)
InputStream (java.io)
A readable source of bytes.Most clients will use input streams that read data from the file system (
System (java.lang)
Provides access to system-related information and resources including standard input and output. Ena
URI (java.net)
A Uniform Resource Identifier that identifies an abstract or physical resource, as specified by RFC
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
NoSuchElementException (java.util)
Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
Top plugins for WebStorm

How to useChunk in org.apache.stanbol.enhancer.nlp.model

Best Java code snippets using org.apache.stanbol.enhancer.nlp.model.Chunk (Showing top 20 results out of 315)

How to use
Chunk
in
org.apache.stanbol.enhancer.nlp.model