/** * Given an annotation, retrieve its last word. */ public static String getLastWord(JCas systemView, Annotation annotation) { List<WordToken> tokens = JCasUtil.selectCovered(systemView, WordToken.class, annotation); if(tokens.size() == 0) { return annotation.getCoveredText(); } WordToken lastToken = tokens.get(tokens.size() - 1); return lastToken.getCoveredText(); }
/** * Given an annotation, retrieve its last word. */ public static String getLastWord(JCas systemView, Annotation annotation) { List<WordToken> tokens = JCasUtil.selectCovered(systemView, WordToken.class, annotation); if(tokens.size() == 0) { return annotation.getCoveredText(); } WordToken lastToken = tokens.get(tokens.size() - 1); return lastToken.getCoveredText(); }
/** * In some pipelines LVG is not run, hence a canonical form does not exist. * In order to prevent NPEs, this method checks for null values of canonical form and covered text * * @param wordToken of interest * @return The first non-null of the word token's canonical form, covered text or {@link #MISSING_WORDTOKEN_TEXT}. */ static public String getCanonicalForm( final WordToken wordToken ) { final String canonicalForm = wordToken.getCanonicalForm(); if ( canonicalForm != null && !canonicalForm.isEmpty() ) { return canonicalForm; } final String coveredText = wordToken.getCoveredText(); if ( coveredText == null ) { return MISSING_WORDTOKEN_TEXT; } return coveredText; }
/** * In some pipelines LVG is not run, hence a canonical form does not exist. * In order to prevent NPEs, this method checks for null values of canonical form and covered text * * @param wordToken of interest * @return The first non-null of the word token's canonical form, covered text or {@link #MISSING_WORDTOKEN_TEXT}. */ static public String getCanonicalForm( final WordToken wordToken ) { final String canonicalForm = wordToken.getCanonicalForm(); if ( canonicalForm != null && !canonicalForm.isEmpty() ) { return canonicalForm; } final String coveredText = wordToken.getCoveredText(); if ( coveredText == null ) { return MISSING_WORDTOKEN_TEXT; } return coveredText; }
/** * return the number of words except for "and" "or" in span * @param jcas * @param begin * @param end * @return */ public static int getNumOfWordTokensInSpan(JCas jcas, int begin, int end) { Set ignoreWords = new HashSet(); ignoreWords.add("and"); ignoreWords.add("or"); Iterator wtIter = FSUtil.getAnnotationsInSpanIterator( jcas, WordToken.type, begin, end); int cnt=0; while(wtIter.hasNext()) { WordToken wt = (WordToken) wtIter.next(); if(ignoreWords.contains(wt.getCoveredText().toLowerCase())) continue; cnt++; } return cnt; }
/** * return the number of words except for "and" "or" in span * @param jcas * @param begin * @param end * @return */ public static int getNumOfWordTokensInSpan(JCas jcas, int begin, int end) { Set ignoreWords = new HashSet(); ignoreWords.add("and"); ignoreWords.add("or"); Iterator wtIter = FSUtil.getAnnotationsIteratorInSpan( jcas, WordToken.type, begin, end); int cnt=0; while(wtIter.hasNext()) { WordToken wt = (WordToken) wtIter.next(); if(ignoreWords.contains(wt.getCoveredText().toLowerCase())) continue; cnt++; } return cnt; }
/** * This is to count contradiction words -- if appears do not negate * eg) Tobacco: no quit in 1980 -- "quit" is contradiction words. So do not negate */ private int getNegConCount(JCas jcas) { int conCnt = 0; Iterator<?> wordTokenItr = jcas.getJFSIndexRepository().getAnnotationIndex( WordToken.type).iterator(); while (wordTokenItr.hasNext()) { WordToken token = (WordToken) wordTokenItr.next(); String tok = token.getCoveredText(); if(tok == null) continue; tok = tok.toLowerCase().replaceAll("[\\W]", " ").trim(); String[] toks = tok.split("\\s"); for(int i=0; i<toks.length; i++) if(conWords.contains(toks[i])) conCnt++; } return conCnt; } private String apiMacroHome = "\\$main_root";
/** * This is to count contradiction words -- if appears do not negate * eg) Tobacco: no quit in 1980 -- "quit" is contradiction words. So do not negate */ private int getNegConCount(JCas jcas) { int conCnt = 0; Iterator<?> wordTokenItr = jcas.getJFSIndexRepository().getAnnotationIndex( WordToken.type).iterator(); while (wordTokenItr.hasNext()) { WordToken token = (WordToken) wordTokenItr.next(); String tok = token.getCoveredText(); if(tok == null) continue; tok = tok.toLowerCase().replaceAll("[\\W]", " ").trim(); String[] toks = tok.split("\\s"); for(int i=0; i<toks.length; i++) if(conWords.contains(toks[i])) conCnt++; } return conCnt; } private String apiMacroHome = "\\$main_root";
for(WordToken wordToken : wordsBetweenArgs) { List<Double> wordVector; if(wordVectors.containsKey(wordToken.getCoveredText().toLowerCase())) { wordVector = wordVectors.get(wordToken.getCoveredText().toLowerCase()); } else { wordVector = wordVectors.get("oov");
for(WordToken wordToken : wordsBetweenArgs) { List<Double> wordVector; if(wordVectors.containsKey(wordToken.getCoveredText().toLowerCase())) { wordVector = wordVectors.get(wordToken.getCoveredText().toLowerCase()); } else { wordVector = wordVectors.get("oov");
while(wtIter.hasNext()) { WordToken wt = (WordToken) wtIter.next(); if(ignoreWords.contains(wt.getCoveredText().toLowerCase())) continue;
while(wtIter.hasNext()) { WordToken wt = (WordToken) wtIter.next(); if(ignoreWords.contains(wt.getCoveredText().toLowerCase())) continue;
/** * Iterates through all of the WordTokenAnnotation's, gets the covered text for each annotation * and increments the frequency count for that text. * * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS) */ @Override public void processCas( CAS cas ) throws ResourceProcessException { try { JCas jcas; jcas = cas.getJCas(); JFSIndexRepository indexes = jcas.getJFSIndexRepository(); Iterator<?> tokenItr = indexes.getAnnotationIndex(WordToken.type).iterator(); while (tokenItr.hasNext()) { WordToken token = (WordToken) tokenItr.next(); String text = token.getCoveredText(); if(!wordFreqs.containsKey(text)) { wordFreqs.put(text, new int[1]); } wordFreqs.get( text )[ 0 ]++; } } catch(Exception exception) { throw new ResourceProcessException(exception); } }
/** * Iterates through all of the WordTokenAnnotation's, gets the covered text for each annotation * and increments the frequency count for that text. * * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(org.apache.uima.cas.CAS) */ @Override public void processCas( CAS cas ) throws ResourceProcessException { try { JCas jcas; jcas = cas.getJCas(); JFSIndexRepository indexes = jcas.getJFSIndexRepository(); Iterator<?> tokenItr = indexes.getAnnotationIndex(WordToken.type).iterator(); while (tokenItr.hasNext()) { WordToken token = (WordToken) tokenItr.next(); String text = token.getCoveredText(); if(!wordFreqs.containsKey(text)) { wordFreqs.put(text, new int[1]); } wordFreqs.get( text )[ 0 ]++; } } catch(Exception exception) { throw new ResourceProcessException(exception); } }
.filter( w -> valueWords.contains( w.getCoveredText().toUpperCase() ) ) .findFirst() .orElse( null );
while (tokenItr.hasNext()) { WordToken token = (WordToken) tokenItr.next(); String strToken = token.getCoveredText();
while (tokenItr.hasNext()) { WordToken token = (WordToken) tokenItr.next(); String strToken = token.getCoveredText();
while (iter.hasNext()) { WordToken t = (WordToken)iter.next(); String s = t.getCoveredText(); if (//s.equalsIgnoreCase("it") || s.equalsIgnoreCase("its") ||
while (iter.hasNext()) { WordToken t = (WordToken)iter.next(); String s = t.getCoveredText(); if (//s.equalsIgnoreCase("it") || s.equalsIgnoreCase("its") ||
for(int i = 0; i < words.size(); i++){ WordToken word = words.get(i); String text = word.getCoveredText(); if(word.getPartOfSpeech().startsWith("PRP")){ if(text.equalsIgnoreCase("I") || text.equalsIgnoreCase("me") || text.equalsIgnoreCase("my")){