/** * Given an annotation, retrieve its last word. */ public static String getLastWord(JCas systemView, Annotation annotation) { List<WordToken> tokens = JCasUtil.selectCovered(systemView, WordToken.class, annotation); if(tokens.size() == 0) { return annotation.getCoveredText(); } WordToken lastToken = tokens.get(tokens.size() - 1); return lastToken.getCoveredText(); }
/** @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public WordToken(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
/** * if this is a word, return the stemmed word, if available - i.e. canonicalForm not null and not empty. * else return the covered text. * @see SetupAuiFirstWord */ public String getText() { if (iv_jcasAnnotObj instanceof WordToken) { WordToken wt = (WordToken) iv_jcasAnnotObj; if (wt.getCanonicalForm() != null && wt.getCanonicalForm().length() > 0) return wt.getCanonicalForm(); } return iv_jcasAnnotObj.getCoveredText(); }
/** * In some pipelines LVG is not run, hence a canonical form does not exist. * In order to prevent NPEs, this method checks for null values of canonical form and covered text * * @param wordToken of interest * @return The first non-null of the word token's canonical form, covered text or {@link #MISSING_WORDTOKEN_TEXT}. */ static public String getCanonicalForm( final WordToken wordToken ) { final String canonicalForm = wordToken.getCanonicalForm(); if ( canonicalForm != null && !canonicalForm.isEmpty() ) { return canonicalForm; } final String coveredText = wordToken.getCoveredText(); if ( coveredText == null ) { return MISSING_WORDTOKEN_TEXT; } return coveredText; }
while(wtIter.hasNext()) { WordToken wt = (WordToken) wtIter.next(); if(ignoreWords.contains(wt.getCoveredText().toLowerCase())) continue; if(n.getBegin()<=wt.getBegin() && n.getEnd()>=wt.getEnd()) { isNE = true; break;
WordToken wta = new WordToken(jcas); wta.setBegin(begin); wta.setEnd(end); int cap = -1; switch (t.getCaps()) break; wta.setCapitalization(cap); wta.setNumPosition(numPos); bta = wta; break;
sent.addToIndexes(); for (int j=0; j < tokenArrays[i].length; j++) { WordToken tok = new WordToken(jcas); LineAndTokenPosition word = new LineAndTokenPosition(); word.setLine(i+1); LineTokenToCharacterOffsetConverter.BeginAndEndCharacterOffsetPair tPos = converter.convert(word); if (tPos == null) { tok.setBegin(0); tok.setEnd(1); } else { tok.setBegin(tPos.getBegin()); tok.setEnd(tPos.getEnd() + 1); tok.setTokenNumber(tokNum); tokNum++; tok.addToIndexes();
for(int i = 0; i < words.size(); i++){ WordToken word = words.get(i); String text = word.getCoveredText(); if(word.getPartOfSpeech().startsWith("PRP")){ if(text.equalsIgnoreCase("I") || text.equalsIgnoreCase("me") || text.equalsIgnoreCase("my")){ Markable drMention = new Markable(jcas, word.getBegin(), word.getEnd()); addToList(jcas, drList, drMention); }else if(text.equalsIgnoreCase("we") || text.equalsIgnoreCase("us") || text.equalsIgnoreCase("our")){ Markable weMention = new Markable(jcas, word.getBegin(), word.getEnd()); addToList(jcas, weList, weMention); }else if(text.equalsIgnoreCase("it")){ Markable ptMention = new Markable(jcas, word.getBegin(), word.getEnd()); addToList(jcas, ptList, ptMention); }else if(text.equalsIgnoreCase("mrs.") || text.equalsIgnoreCase("mr.") || text.equalsIgnoreCase("ms.")){ Markable ptMention = new Markable(jcas, word.getBegin(), words.get(i+1).getEnd()); addToList(jcas, ptList, ptMention); }else if(text.equalsIgnoreCase("patient") || text.equalsIgnoreCase("pt")){ Markable ptMention = new Markable(jcas, word.getBegin(), word.getEnd()); addToList(jcas, ptList, ptMention);
private static Markable getDoctorMarkable(JCas jcas, WordToken drToken){ Markable markable = null; ConllDependencyNode nnpHead = DependencyUtility.getDependencyNode(jcas, drToken); try{ while(nnpHead != null && nnpHead.getHead() != null && nnpHead.getHead().getId() != 0 && nnpHead.getHead().getPostag().equals("NNP")){ nnpHead = nnpHead.getHead(); } }catch(NullPointerException e){ System.err.print("."); } int start = drToken.getBegin(); int end = nnpHead.getEnd(); if(end < start) end = drToken.getEnd(); markable = new Markable(jcas, start, end); return markable; } }
/** @generated * @param jcas JCas to which this Feature Structure belongs */ public WordToken(JCas jcas) { super(jcas); readObject(); }
/** * A utility method that annotates a given range. */ protected void annotateRange(JCas jcas, String text, int rangeBegin, int rangeEnd) throws AnalysisEngineProcessException { JFSIndexRepository indexes = jcas.getJFSIndexRepository(); Iterator<?> wordItr = indexes.getAnnotationIndex(WordToken.type) .iterator(); while (wordItr.hasNext()) { WordToken wordAnnotation = (WordToken) wordItr.next(); if (wordAnnotation.getBegin() >= rangeBegin && wordAnnotation.getEnd() <= rangeEnd) { String word = text.substring(wordAnnotation.getBegin(), wordAnnotation.getEnd()); // if the original word was misspelled, use the spell correction String suggestion = wordAnnotation.getSuggestion(); if ((suggestion != null) && (suggestion.length() > 0)) { word = suggestion; } // skip past words that are part of the exclusion set if (exclusionSet.contains(word)) continue; setCanonicalForm(wordAnnotation, word); if (postLemmas) setLemma(wordAnnotation, word, jcas); } } }
if (oldSystemTokenClass.equals(WordToken.class.getName())) newGoldToken = new WordToken(goldView, oldSystemToken.getBegin(), oldSystemToken.getEnd()); } else if (oldSystemTokenClass.equals(ContractionToken.class.getName()))
while(wtIter.hasNext()) { WordToken wt = (WordToken) wtIter.next(); if(ignoreWords.contains(wt.getCoveredText().toLowerCase())) continue; if(n.getBegin()<=wt.getBegin() && n.getEnd()>=wt.getEnd()) { isNE = true; break;
WordToken wta = new WordToken(jcas); wta.setBegin(begin); wta.setEnd(end); int cap = -1; switch (t.getCaps()) break; wta.setCapitalization(cap); wta.setNumPosition(numPos); bta = wta; break;
sent.addToIndexes(); for (int j=0; j < tokenArrays[i].length; j++) { WordToken tok = new WordToken(jcas); LineAndTokenPosition word = new LineAndTokenPosition(); word.setLine(i+1); LineTokenToCharacterOffsetConverter.BeginAndEndCharacterOffsetPair tPos = converter.convert(word); if (tPos == null) { tok.setBegin(0); tok.setEnd(1); } else { tok.setBegin(tPos.getBegin()); tok.setEnd(tPos.getEnd() + 1); tok.setTokenNumber(tokNum); tokNum++; tok.addToIndexes();
for(int i = 0; i < words.size(); i++){ WordToken word = words.get(i); String text = word.getCoveredText(); if(word.getPartOfSpeech().startsWith("PRP")){ if(text.equalsIgnoreCase("I") || text.equalsIgnoreCase("me") || text.equalsIgnoreCase("my")){ Markable drMention = new Markable(jcas, word.getBegin(), word.getEnd()); addToList(jcas, drList, drMention); }else if(text.equalsIgnoreCase("we") || text.equalsIgnoreCase("us") || text.equalsIgnoreCase("our")){ Markable weMention = new Markable(jcas, word.getBegin(), word.getEnd()); addToList(jcas, weList, weMention); }else if(text.equalsIgnoreCase("it")){ Markable ptMention = new Markable(jcas, word.getBegin(), word.getEnd()); addToList(jcas, ptList, ptMention); }else if(text.equalsIgnoreCase("mrs.") || text.equalsIgnoreCase("mr.") || text.equalsIgnoreCase("ms.")){ Markable ptMention = new Markable(jcas, word.getBegin(), words.get(i+1).getEnd()); addToList(jcas, ptList, ptMention); }else if(text.equalsIgnoreCase("patient") || text.equalsIgnoreCase("pt")){ Markable ptMention = new Markable(jcas, word.getBegin(), word.getEnd()); addToList(jcas, ptList, ptMention);
/** * In some pipelines LVG is not run, hence a canonical form does not exist. * In order to prevent NPEs, this method checks for null values of canonical form and covered text * * @param wordToken of interest * @return The first non-null of the word token's canonical form, covered text or {@link #MISSING_WORDTOKEN_TEXT}. */ static public String getCanonicalForm( final WordToken wordToken ) { final String canonicalForm = wordToken.getCanonicalForm(); if ( canonicalForm != null && !canonicalForm.isEmpty() ) { return canonicalForm; } final String coveredText = wordToken.getCoveredText(); if ( coveredText == null ) { return MISSING_WORDTOKEN_TEXT; } return coveredText; }
private static Markable getDoctorMarkable(JCas jcas, WordToken drToken){ Markable markable = null; ConllDependencyNode nnpHead = DependencyUtility.getDependencyNode(jcas, drToken); try{ while(nnpHead != null && nnpHead.getHead() != null && nnpHead.getHead().getId() != 0 && nnpHead.getHead().getPostag().equals("NNP")){ nnpHead = nnpHead.getHead(); } }catch(NullPointerException e){ System.err.print("."); } int start = drToken.getBegin(); int end = nnpHead.getEnd(); if(end < start) end = drToken.getEnd(); markable = new Markable(jcas, start, end); return markable; } }
/** @generated * @param jcas JCas to which this Feature Structure belongs */ public WordToken(JCas jcas) { super(jcas); readObject(); }
/** * A utility method that annotates a given range. */ protected void annotateRange(JCas jcas, String text, int rangeBegin, int rangeEnd) throws AnalysisEngineProcessException { JFSIndexRepository indexes = jcas.getJFSIndexRepository(); Iterator<?> wordItr = indexes.getAnnotationIndex(WordToken.type) .iterator(); while (wordItr.hasNext()) { WordToken wordAnnotation = (WordToken) wordItr.next(); if (wordAnnotation.getBegin() >= rangeBegin && wordAnnotation.getEnd() <= rangeEnd) { String word = text.substring(wordAnnotation.getBegin(), wordAnnotation.getEnd()); // if the original word was misspelled, use the spell correction String suggestion = wordAnnotation.getSuggestion(); if ((suggestion != null) && (suggestion.length() > 0)) { word = suggestion; } // skip past words that are part of the exclusion set if (exclusionSet.contains(word)) continue; setCanonicalForm(wordAnnotation, word); if (postLemmas) setLemma(wordAnnotation, word, jcas); } } }