org.apache.stanbol.enhancer.nlp.model.Section.getSpan java code examples

@Override
public String toString() {
  StringBuilder sb = new StringBuilder();
  sb.append('[').append(token.index).append(',').append(token.token);
  sb.append("] chunk: ");
  if(token.inChunk == null){
    sb.append("none");
  } else {
    sb.append(token.inChunk.chunk);
  }
  sb.append("| sentence: ");
  if(section == null){
    sb.append("none");
  } else if(section.getSpan().length() > 45){
    sb.append(section.getSpan().substring(0, 45)).append(" ...");
  } else {
    sb.append(section.getSpan());
  }
  return sb.toString();
}

@Override
public String toString() {
  StringBuilder sb = new StringBuilder();
  sb.append('[').append(token.index).append(',').append(token.token);
  sb.append("] chunk: ");
  if(token.inChunk == null){
    sb.append("none");
  } else {
    sb.append(token.inChunk.chunk);
  }
  sb.append("| sentence: ");
  if(section == null){
    sb.append("none");
  } else if(section.getSpan().length() > 45){
    sb.append(section.getSpan().substring(0, 45)).append(" ...");
  } else {
    sb.append(section.getSpan());
  }
  return sb.toString();
}

private List<Token> tokenize(Section section,String langauge) {
  Tokenizer tokenizer = getTokenizer(langauge);
  String text = section.getSpan();
  List<Token> tokens = new ArrayList<Token>(text.length()/5); //assume avr. token length is 5
  opennlp.tools.util.Span[] tokenSpans = tokenizer.tokenizePos(section.getSpan());
  for(int i=0;i<tokenSpans.length;i++){
    Token token = section.addToken(tokenSpans[i].getStart(), tokenSpans[i].getEnd());
    log.trace(" > add {}",token);
    tokens.add(token);
  }
  return tokens;
}

/**
 * Getter for the text covered by the next tokenCount tokens relative to
 * {@link #token}. It uses the {@link #textCache} to lookup/store such texts.
 * Given the Tokens
 * <pre>
 *    [This, is, an, Example]
 * </pre>
 * and the parameter <code>3</code> this method will return
 * <pre>
 *     This is an
 * </pre>
 * @param tokenCount the number of tokens to be included relative to 
 * {@link #tokenIndex}
 * @return the text covered by the span start of {@link #token} to end of
 * token at <code>{@link #tokenIndex}+tokenCount</code>.
 */
public String getTokenText(int start, int tokenCount){
  int offset = section.getStart();
  return section.getSpan().substring(
    tokens.get(start).token.getStart()-offset,
    tokens.get(start+(tokenCount-1)).token.getEnd()-offset);
}

/**
 * Getter for the text covered by the next tokenCount tokens relative to
 * {@link #token}. It uses the {@link #textCache} to lookup/store such texts.
 * Given the Tokens
 * <pre>
 *    [This, is, an, Example]
 * </pre>
 * and the parameter <code>3</code> this method will return
 * <pre>
 *     This is an
 * </pre>
 * @param tokenCount the number of tokens to be included relative to 
 * {@link #tokenIndex}
 * @return the text covered by the span start of {@link #token} to end of
 * token at <code>{@link #tokenIndex}+tokenCount</code>.
 */
public String getTokenText(int start, int tokenCount){
  int offset = section.getStart();
  return section.getSpan().substring(
    tokens.get(start).token.getStart()-offset,
    tokens.get(start+(tokenCount-1)).token.getEnd()-offset);
}

private Occurrence(Section sentence,Token start,Token end){
  this.start = start.getStart();
  this.end = end.getEnd();
  String context = sentence.getSpan();
  if(context.length() > MAX_CONTEXT_LENGTH){
    context = start.getContext().getSpan().substring(
      Math.max(0, this.start-CONTEXT_TOKEN_COUNT),
      Math.min(this.end+CONTEXT_TOKEN_COUNT, start.getContext().getEnd())-1);
  }
  this.context = context;
}
/**

private Occurrence(Section sentence,Token start,Token end){
  this.start = start.getStart();
  this.end = end.getEnd();
  String context = sentence.getSpan();
  if(context.length() > MAX_CONTEXT_LENGTH){
    context = start.getContext().getSpan().substring(
      Math.max(0, this.start-CONTEXT_TOKEN_COUNT),
      Math.min(this.end+CONTEXT_TOKEN_COUNT, start.getContext().getEnd())-1);
  }
  this.context = context;
}
/**

Section section = sections.next();
opennlp.tools.util.Span[] tokenSpans = tokenizer.tokenizePos(section.getSpan());
for(int i=0;i<tokenSpans.length;i++){
  Token token = section.addToken(tokenSpans[i].getStart(), tokenSpans[i].getEnd());

String sentence = sentences.get(i).getSpan();

throw new EngineException("Missing POS value for Token '"
  + token.getSpan()+"' of ContentItem "+ci.getUri()
  + "(Sentence: '"+sentence.getSpan()+"'). This may "
  + "indicate that a POS tagging Engine is missing in "
  + "the EnhancementChain or that the used POS tagging "

if(span.getStart() >= span.getEnd()){ //save guard against empty spans
  log.warn("Detected Empty Span {} in section {}: '{}'",
    new Object[]{span,section, section.getSpan()});

if(span.getStart() >= span.getEnd()){ //save guard against empty spans
  log.warn("Detected Empty Span {} in section {}: '{}'",
    new Object[]{span,section, section.getSpan()});

Popular methods of Section

getTokens
The Tokens covered by this Sentence. Returned Iterators MUST NOT throw ConcurrentModificationExcepti
getEnclosed
Iterates over all enclosed Span within the parsed window. Only Spans with on of the parsed types are
getEnd
getStart
addToken
Adds an Token relative to this Sentence
getType

Popular in Java

Finding current android device location
getSystemService (Context)
onCreateOptionsMenu (Activity)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
Socket (java.net)
Provides a client-side TCP socket.
URLConnection (java.net)
A connection to a URL for reading or writing. For HTTP connections, see HttpURLConnection for docume
ByteBuffer (java.nio)
A buffer for bytes. A byte buffer can be created in either one of the following ways: * #allocate
Menu (java.awt)
Filter (javax.servlet)
A filter is an object that performs filtering tasks on either the request to a resource (a servlet o
BasicDataSource (org.apache.commons.dbcp)
Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
Top 12 Jupyter Notebook extensions

How to use getSpanmethodin org.apache.stanbol.enhancer.nlp.model.Section

Best Java code snippets using org.apache.stanbol.enhancer.nlp.model.Section.getSpan (Showing top 12 results out of 315)

How to use
getSpan
method
in
org.apache.stanbol.enhancer.nlp.model.Section