org.apache.ctakes.typesystem.type.syntax.WordToken java code examples

/**
 * Given an annotation, retrieve its last word.
 */
public static String getLastWord(JCas systemView, Annotation annotation) {
 
 List<WordToken> tokens = JCasUtil.selectCovered(systemView, WordToken.class, annotation);
 if(tokens.size() == 0) {
    return annotation.getCoveredText();
 }
 
 WordToken lastToken = tokens.get(tokens.size() - 1);
 return lastToken.getCoveredText();
}

/** @generated
 * @param jcas JCas to which this Feature Structure belongs
 * @param begin offset to the begin spot in the SofA
 * @param end offset to the end spot in the SofA 
*/  
public WordToken(JCas jcas, int begin, int end) {
 super(jcas);
 setBegin(begin);
 setEnd(end);
 readObject();
}

/**
 * if this is a word, return the stemmed word, if available - i.e. canonicalForm not null and not empty.
 * else return the covered text.
 * @see SetupAuiFirstWord
 */
public String getText() {
  if (iv_jcasAnnotObj instanceof WordToken) {
    WordToken wt = (WordToken) iv_jcasAnnotObj;
    if (wt.getCanonicalForm() != null && wt.getCanonicalForm().length() > 0)
      return wt.getCanonicalForm();
  }
  return iv_jcasAnnotObj.getCoveredText();
}

/**
* In some pipelines LVG is not run, hence a canonical form does not exist.
* In order to prevent NPEs, this method checks for null values of canonical form and covered text
*
* @param wordToken of interest
* @return The first non-null of the word token's canonical form, covered text or {@link #MISSING_WORDTOKEN_TEXT}.
*/
static public String getCanonicalForm( final WordToken wordToken ) {
 final String canonicalForm = wordToken.getCanonicalForm();
 if ( canonicalForm != null && !canonicalForm.isEmpty() ) {
   return canonicalForm;
 }
 final String coveredText = wordToken.getCoveredText();
 if ( coveredText == null ) {
   return MISSING_WORDTOKEN_TEXT;
 }
 return coveredText;
}

while(wtIter.hasNext()) {
  WordToken wt = (WordToken) wtIter.next();
  if(ignoreWords.contains(wt.getCoveredText().toLowerCase())) continue;	
    if(n.getBegin()<=wt.getBegin() && n.getEnd()>=wt.getEnd()) {
      isNE = true;
      break;

WordToken wta = new WordToken(jcas);
wta.setBegin(begin);
wta.setEnd(end);
int cap = -1;
switch (t.getCaps())
  break;
wta.setCapitalization(cap);
wta.setNumPosition(numPos);
bta = wta;
break;

sent.addToIndexes();
for (int j=0; j < tokenArrays[i].length; j++) {
  WordToken tok = new WordToken(jcas);
  LineAndTokenPosition word = new LineAndTokenPosition();
  word.setLine(i+1);
  LineTokenToCharacterOffsetConverter.BeginAndEndCharacterOffsetPair tPos = converter.convert(word);
  if (tPos == null) {
    tok.setBegin(0);
    tok.setEnd(1);
  } else {
    tok.setBegin(tPos.getBegin());
    tok.setEnd(tPos.getEnd() + 1);
  tok.setTokenNumber(tokNum);
  tokNum++;    
  tok.addToIndexes();

for(int i = 0; i < words.size(); i++){
 WordToken word = words.get(i);
 String text = word.getCoveredText();
 if(word.getPartOfSpeech().startsWith("PRP")){
  if(text.equalsIgnoreCase("I") || text.equalsIgnoreCase("me") || text.equalsIgnoreCase("my")){
   Markable drMention = new Markable(jcas, word.getBegin(), word.getEnd());
   addToList(jcas, drList, drMention);
  }else if(text.equalsIgnoreCase("we") || text.equalsIgnoreCase("us") || text.equalsIgnoreCase("our")){
   Markable weMention = new Markable(jcas, word.getBegin(), word.getEnd());
   addToList(jcas, weList, weMention);
  }else if(text.equalsIgnoreCase("it")){
   Markable ptMention = new Markable(jcas, word.getBegin(), word.getEnd());
   addToList(jcas, ptList, ptMention);
 }else if(text.equalsIgnoreCase("mrs.") || text.equalsIgnoreCase("mr.") || text.equalsIgnoreCase("ms.")){
  Markable ptMention = new Markable(jcas, word.getBegin(), words.get(i+1).getEnd());
  addToList(jcas, ptList, ptMention);
 }else if(text.equalsIgnoreCase("patient") || text.equalsIgnoreCase("pt")){
  Markable ptMention = new Markable(jcas, word.getBegin(), word.getEnd());
  addToList(jcas, ptList, ptMention);

 private static Markable getDoctorMarkable(JCas jcas, WordToken drToken){
  Markable markable = null;
  
  ConllDependencyNode nnpHead = DependencyUtility.getDependencyNode(jcas, drToken);
  try{
    while(nnpHead != null && nnpHead.getHead() != null && nnpHead.getHead().getId() != 0 && nnpHead.getHead().getPostag().equals("NNP")){
      nnpHead = nnpHead.getHead();
    }
  }catch(NullPointerException e){
   System.err.print(".");
  }
  
  int start = drToken.getBegin();
  int end = nnpHead.getEnd();
  if(end < start) end = drToken.getEnd();
  
  markable = new Markable(jcas, start, end);    
  return markable;
 }
}

/** @generated
 * @param jcas JCas to which this Feature Structure belongs 
 */
public WordToken(JCas jcas) {
 super(jcas);
 readObject();   
}

/**
 * A utility method that annotates a given range.
 */
protected void annotateRange(JCas jcas, String text, int rangeBegin,
    int rangeEnd)
    throws AnalysisEngineProcessException {
  JFSIndexRepository indexes = jcas.getJFSIndexRepository();
  Iterator<?> wordItr = indexes.getAnnotationIndex(WordToken.type)
      .iterator();
  while (wordItr.hasNext()) {
    WordToken wordAnnotation = (WordToken) wordItr.next();
    if (wordAnnotation.getBegin() >= rangeBegin
        && wordAnnotation.getEnd() <= rangeEnd) {
      String word = text.substring(wordAnnotation.getBegin(),
          wordAnnotation.getEnd());
      // if the original word was misspelled, use the spell correction
      String suggestion = wordAnnotation.getSuggestion();
      if ((suggestion != null) && (suggestion.length() > 0)) {
        word = suggestion;
      }
      // skip past words that are part of the exclusion set
      if (exclusionSet.contains(word))
        continue;
      setCanonicalForm(wordAnnotation, word);
      if (postLemmas)
        setLemma(wordAnnotation, word, jcas);
    }
  }
}

if (oldSystemTokenClass.equals(WordToken.class.getName()))
 newGoldToken = new WordToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd());
} else if (oldSystemTokenClass.equals(ContractionToken.class.getName()))

while(wtIter.hasNext()) {
  WordToken wt = (WordToken) wtIter.next();
  if(ignoreWords.contains(wt.getCoveredText().toLowerCase())) continue;	
    if(n.getBegin()<=wt.getBegin() && n.getEnd()>=wt.getEnd()) {
      isNE = true;
      break;

WordToken wta = new WordToken(jcas);
wta.setBegin(begin);
wta.setEnd(end);
int cap = -1;
switch (t.getCaps())
  break;
wta.setCapitalization(cap);
wta.setNumPosition(numPos);
bta = wta;
break;

sent.addToIndexes();
for (int j=0; j < tokenArrays[i].length; j++) {
  WordToken tok = new WordToken(jcas);
  LineAndTokenPosition word = new LineAndTokenPosition();
  word.setLine(i+1);
  LineTokenToCharacterOffsetConverter.BeginAndEndCharacterOffsetPair tPos = converter.convert(word);
  if (tPos == null) {
    tok.setBegin(0);
    tok.setEnd(1);
  } else {
    tok.setBegin(tPos.getBegin());
    tok.setEnd(tPos.getEnd() + 1);
  tok.setTokenNumber(tokNum);
  tokNum++;    
  tok.addToIndexes();

for(int i = 0; i < words.size(); i++){
 WordToken word = words.get(i);
 String text = word.getCoveredText();
 if(word.getPartOfSpeech().startsWith("PRP")){
  if(text.equalsIgnoreCase("I") || text.equalsIgnoreCase("me") || text.equalsIgnoreCase("my")){
   Markable drMention = new Markable(jcas, word.getBegin(), word.getEnd());
   addToList(jcas, drList, drMention);
  }else if(text.equalsIgnoreCase("we") || text.equalsIgnoreCase("us") || text.equalsIgnoreCase("our")){
   Markable weMention = new Markable(jcas, word.getBegin(), word.getEnd());
   addToList(jcas, weList, weMention);
  }else if(text.equalsIgnoreCase("it")){
   Markable ptMention = new Markable(jcas, word.getBegin(), word.getEnd());
   addToList(jcas, ptList, ptMention);
 }else if(text.equalsIgnoreCase("mrs.") || text.equalsIgnoreCase("mr.") || text.equalsIgnoreCase("ms.")){
  Markable ptMention = new Markable(jcas, word.getBegin(), words.get(i+1).getEnd());
  addToList(jcas, ptList, ptMention);
 }else if(text.equalsIgnoreCase("patient") || text.equalsIgnoreCase("pt")){
  Markable ptMention = new Markable(jcas, word.getBegin(), word.getEnd());
  addToList(jcas, ptList, ptMention);

/**
* In some pipelines LVG is not run, hence a canonical form does not exist.
* In order to prevent NPEs, this method checks for null values of canonical form and covered text
*
* @param wordToken of interest
* @return The first non-null of the word token's canonical form, covered text or {@link #MISSING_WORDTOKEN_TEXT}.
*/
static public String getCanonicalForm( final WordToken wordToken ) {
 final String canonicalForm = wordToken.getCanonicalForm();
 if ( canonicalForm != null && !canonicalForm.isEmpty() ) {
   return canonicalForm;
 }
 final String coveredText = wordToken.getCoveredText();
 if ( coveredText == null ) {
   return MISSING_WORDTOKEN_TEXT;
 }
 return coveredText;
}

 private static Markable getDoctorMarkable(JCas jcas, WordToken drToken){
  Markable markable = null;
  
  ConllDependencyNode nnpHead = DependencyUtility.getDependencyNode(jcas, drToken);
  try{
    while(nnpHead != null && nnpHead.getHead() != null && nnpHead.getHead().getId() != 0 && nnpHead.getHead().getPostag().equals("NNP")){
      nnpHead = nnpHead.getHead();
    }
  }catch(NullPointerException e){
   System.err.print(".");
  }
  
  int start = drToken.getBegin();
  int end = nnpHead.getEnd();
  if(end < start) end = drToken.getEnd();
  
  markable = new Markable(jcas, start, end);    
  return markable;
 }
}

/** @generated
 * @param jcas JCas to which this Feature Structure belongs 
 */
public WordToken(JCas jcas) {
 super(jcas);
 readObject();   
}

/**
 * A utility method that annotates a given range.
 */
protected void annotateRange(JCas jcas, String text, int rangeBegin,
    int rangeEnd)
    throws AnalysisEngineProcessException {
  JFSIndexRepository indexes = jcas.getJFSIndexRepository();
  Iterator<?> wordItr = indexes.getAnnotationIndex(WordToken.type)
      .iterator();
  while (wordItr.hasNext()) {
    WordToken wordAnnotation = (WordToken) wordItr.next();
    if (wordAnnotation.getBegin() >= rangeBegin
        && wordAnnotation.getEnd() <= rangeEnd) {
      String word = text.substring(wordAnnotation.getBegin(),
          wordAnnotation.getEnd());
      // if the original word was misspelled, use the spell correction
      String suggestion = wordAnnotation.getSuggestion();
      if ((suggestion != null) && (suggestion.length() > 0)) {
        word = suggestion;
      }
      // skip past words that are part of the exclusion set
      if (exclusionSet.contains(word))
        continue;
      setCanonicalForm(wordAnnotation, word);
      if (postLemmas)
        setLemma(wordAnnotation, word, jcas);
    }
  }
}

Javadoc

Differentiates a token as being a word rather than a punctuation, symbol, newline, contraction, or number. Equivalent to cTAKES: edu.mayo.bmi.uima.core.type.WordToken Updated by JCasGen Mon Apr 17 15:42:06 EDT 2017 XML source: /private/tmp/murali/ctakes/4.0.0/target/checkout/ctakes-smoking-status/target/jcasgen/typesystem.xml

Most used methods

getCoveredText
setBegin
setEnd
getCanonicalForm
getter for canonicalForm - gets
readObject
Write your own initialization here
getBegin
getEnd
<init>
addToIndexes
getCapitalization
getter for capitalization - gets
getNumPosition
getter for numPosition - gets
getPartOfSpeech

Popular in Java

Making http requests using okhttp
getContentResolver (Context)
getSharedPreferences (Context)
setContentView (Activity)
GregorianCalendar (java.util)
GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
Properties (java.util)
A Properties object is a Hashtable where the keys and values must be Strings. Each property can have
TimerTask (java.util)
The TimerTask class represents a task to run at a specified time. The task may be run once or repeat
Stream (java.util.stream)
A sequence of elements supporting sequential and parallel aggregate operations. The following exampl
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
BufferedImage (java.awt.image)
The BufferedImage subclass describes an java.awt.Image with an accessible buffer of image data. All
Best plugins for Eclipse

How to useWordToken in org.apache.ctakes.typesystem.type.syntax

Best Java code snippets using org.apache.ctakes.typesystem.type.syntax.WordToken (Showing top 20 results out of 315)

How to use
WordToken
in
org.apache.ctakes.typesystem.type.syntax