public CharacterTokenAdapter(BaseToken bta) { super(bta); iv_char = bta.getCoveredText().charAt(0); }
public CharacterTokenAdapter(BaseToken bta) { super(bta); iv_char = bta.getCoveredText().charAt(0); }
public ArrayList<String> contentWords (int begin, int end) { ArrayList<String> ret = new ArrayList<String>(); ArrayList<BaseToken> l = containedTokens(begin, end); for (BaseToken t : l) { String s = t.getCoveredText().toLowerCase(); if (!stopwords.contains(s)) ret.add(s); } return ret; }
public static Set<String> contentWords(Annotation a1){ Set<String> words = new HashSet<>(); for(BaseToken tok : JCasUtil.selectCovered(BaseToken.class, a1)){ words.add(tok.getCoveredText().toLowerCase()); } return words; }
public ArrayList<String> contentWords (int begin, int end) { ArrayList<String> ret = new ArrayList<String>(); ArrayList<BaseToken> l = containedTokens(begin, end); for (BaseToken t : l) { String s = t.getCoveredText().toLowerCase(); if (!stopwords.contains(s)) ret.add(s); } return ret; }
public static Set<String> contentWords(Annotation a1){ Set<String> words = new HashSet<>(); for(BaseToken tok : JCasUtil.selectCovered(BaseToken.class, a1)){ words.add(tok.getCoveredText().toLowerCase()); } return words; }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { for(Markable m : JCasUtil.select(jcas, Markable.class)){ StringBuffer buff = new StringBuffer(); for(BaseToken token : JCasUtil.selectCovered(BaseToken.class, m)){ buff.append(token.getCoveredText().replace('\n', ' ')); buff.append(' '); } if(buff.length() > 0){ out.println(buff.substring(0, buff.length()-1)); } } }
/** * @param jcas - * @param sentence - * @return the (base) tokens of the sentence, separated by spaces */ static private String getSentenceTokens( final JCas jcas, final Annotation sentence ) { final StringBuilder sb = new StringBuilder(); final List<BaseToken> allBaseTokens = org.apache.uima.fit.util.JCasUtil .selectCovered( jcas, BaseToken.class, sentence ); for ( BaseToken baseToken : allBaseTokens ) { if ( baseToken instanceof NewlineToken ) { // mid-sentence newlines are ignored - this honors the newline behavior of the selected Sentence Detector continue; } sb.append( baseToken.getCoveredText() ).append( ' ' ); } return sb.toString(); }
/** * @param jcas - * @param sentence - * @return the (base) tokens of the sentence, separated by spaces */ static private String getSentenceTokens( final JCas jcas, final Annotation sentence ) { final StringBuilder sb = new StringBuilder(); final List<BaseToken> allBaseTokens = org.apache.uima.fit.util.JCasUtil .selectCovered( jcas, BaseToken.class, sentence ); for ( BaseToken baseToken : allBaseTokens ) { if ( baseToken instanceof NewlineToken ) { // mid-sentence newlines are ignored - this honors the newline behavior of the selected Sentence Detector continue; } sb.append( baseToken.getCoveredText() ).append( ' ' ); } return sb.toString(); }
static private Map<TextSpan, String> createBaseTokenMap( final JCas jcas, final AnnotationFS sentence ) { final int sentenceBegin = sentence.getBegin(); final Collection<BaseToken> baseTokens = JCasUtil.selectCovered( jcas, BaseToken.class, sentence ); final Map<TextSpan, String> baseItemMap = new LinkedHashMap<>(); for ( BaseToken baseToken : baseTokens ) { final TextSpan textSpan = new DefaultTextSpan( baseToken, sentenceBegin ); if ( textSpan.getWidth() == 0 ) { continue; } if ( baseToken instanceof NewlineToken ) { baseItemMap.put( textSpan, " " ); continue; } baseItemMap.put( textSpan, baseToken.getCoveredText() ); } return baseItemMap; }
@SuppressWarnings("null") @Override public void process(JCas jCas) throws AnalysisEngineProcessException { ArrayList<BaseToken> tokens = new ArrayList<BaseToken>(JCasUtil.select(jCas, BaseToken.class)); String lastKey = null; BaseToken lastToken = null; for(int i = 0; i < tokens.size(); i++){ BaseToken token = tokens.get(i); String key = token.getCoveredText().toLowerCase(); if(cueWords.containsKey(key)){ addCuePhrase(jCas, key, token.getBegin(), token.getEnd()); } if(i > 0){ String twoKey = lastKey + " " + key; if(cueWords.containsKey(twoKey)){ addCuePhrase(jCas, twoKey, lastToken.getBegin(), token.getEnd()); } } lastToken = token; lastKey = key; } }
@SuppressWarnings("null") @Override public void process(JCas jCas) throws AnalysisEngineProcessException { ArrayList<BaseToken> tokens = new ArrayList<BaseToken>(JCasUtil.select(jCas, BaseToken.class)); String lastKey = null; BaseToken lastToken = null; for(int i = 0; i < tokens.size(); i++){ BaseToken token = tokens.get(i); String key = token.getCoveredText().toLowerCase(); if(cueWords.containsKey(key)){ addCuePhrase(jCas, key, token.getBegin(), token.getEnd()); } if(i > 0){ String twoKey = lastKey + " " + key; if(cueWords.containsKey(twoKey)){ addCuePhrase(jCas, twoKey, lastToken.getBegin(), token.getEnd()); } } lastToken = token; lastKey = key; } }
@Override public List<Feature> extract(JCas jCas, IdentifiedAnnotation arg1, IdentifiedAnnotation arg2) throws AnalysisEngineProcessException { HashSet<String> prepositions = new HashSet<String>(Arrays.asList("about", "above", "across", "against", "amid", "around", "at", "atop", "behind", "below", "beneath", "beside", "between", "beyond", "by", "for", "from", "down", "in", "including", "inside", "into", "mid", "near", "of", "off", "on", "onto", "opposite", "out", "outside", "over", "round", "through", "throughout", "to", "under", "underneath", "with", "within", "without")); List<Feature> features = new ArrayList<Feature>(); // entity1 ... entity2 scenario if(arg1.getEnd() < arg2.getBegin()) { for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, arg1.getEnd(), arg2.getBegin())) { if(prepositions.contains(token.getCoveredText())) { features.add(new Feature("arg1_preposition_arg2", token.getCoveredText())); } } } // entity2 ... entity1 scenario if(arg2.getEnd() < arg1.getBegin()) { for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, arg2.getEnd(), arg1.getBegin())) { if(prepositions.contains(token.getCoveredText())) { features.add(new Feature("arg2_preposition_arg1", token.getCoveredText())); } } } return features; }
String token = baseTokens.get(0).getCoveredText().toLowerCase(); String pos = baseTokens.get(0).getPartOfSpeech();
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence); DEPTree tree = new DEPTree(); // Convert CAS data into structures usable by ClearNLP for (int i = 0; i < tokens.size(); i++) { BaseToken token = tokens.get(i); DEPNode node = new DEPNode(i+1, token.getCoveredText()); tree.add(node); } // Run parser and convert output back to CAS friendly data types postagger.process(tree); for (int i = 0; i < tokens.size(); i++) { BaseToken token = tokens.get(i); DEPNode node = tree.get(i+1); token.setPartOfSpeech(node.pos); } } } }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { List<BaseToken> tokens = JCasUtil.selectCovered(jCas, BaseToken.class, sentence); DEPTree tree = new DEPTree(); // Convert CAS data into structures usable by ClearNLP for (int i = 0; i < tokens.size(); i++) { BaseToken token = tokens.get(i); DEPNode node = new DEPNode(i+1, token.getCoveredText()); tree.add(node); } // Run parser and convert output back to CAS friendly data types postagger.process(tree); for (int i = 0; i < tokens.size(); i++) { BaseToken token = tokens.get(i); DEPNode node = tree.get(i+1); token.setPartOfSpeech(node.pos); } } } }
@Override public List<Feature> extract(JCas view, BaseToken token) throws CleartkExtractorException { List<Feature> feats = new ArrayList<>(); String wordText = token.getCoveredText(); WordVector vec = null; if(words.containsKey(wordText)){ vec = words.getVector(wordText); }else if(words.containsKey(wordText.toLowerCase())){ vec = words.getVector(wordText.toLowerCase()); }else{ return feats; } for(int i = 0; i < vec.size(); i++){ feats.add(new Feature(getFeatureName() + "_" + i, vec.getValue(i))); } return feats; }
@Override public List<Feature> extract(JCas view, BaseToken token) throws CleartkExtractorException { List<Feature> feats = new ArrayList<>(); String wordText = token.getCoveredText(); WordVector vec = null; if(words.containsKey(wordText)){ vec = words.getVector(wordText); }else if(words.containsKey(wordText.toLowerCase())){ vec = words.getVector(wordText.toLowerCase()); }else{ return feats; } for(int i = 0; i < vec.size(); i++){ feats.add(new Feature(getFeatureName() + "_" + i, vec.getValue(i))); } return feats; }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { List<Paragraph> pars = new ArrayList<>(JCasUtil.select(jcas, Paragraph.class)); FSArray parVecs = new FSArray(jcas, pars.size()); for(int parNum = 0; parNum < pars.size(); parNum++){ Paragraph par = pars.get(parNum); float[] parVec = new float[words.getDimensionality()]; List<BaseToken> tokens = JCasUtil.selectCovered(BaseToken.class, par); for(int i = 0; i < tokens.size(); i++){ BaseToken token = tokens.get(i); if(token instanceof WordToken){ String word = token.getCoveredText().toLowerCase(); if(words.containsKey(word)){ WordVector wv = words.getVector(word); for(int j = 0; j < parVec.length; j++){ parVec[j] += wv.getValue(j); } } } } normalize(parVec); FloatArray vec = new FloatArray(jcas, words.getDimensionality()); vec.copyFromArray(parVec, 0, 0, parVec.length); vec.addToIndexes(); parVecs.set(parNum, vec); } parVecs.addToIndexes(); }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { // Create a dummy IdentifiedAnnotation in the type system // If the BaseToken Part Of Speech is a Noun Collection<BaseToken> tokens = JCasUtil.select(jcas, BaseToken.class); for (BaseToken token : tokens) { if (saveAnnotation && token.getPartOfSpeech() != null && token.getPartOfSpeech().startsWith("N")) { IdentifiedAnnotation ann = new IdentifiedAnnotation(jcas); ann.setBegin(token.getBegin()); ann.setEnd(token.getEnd()); ann.addToIndexes(); if (printAnnotation) { LOG.info("Token:" + token.getCoveredText() + " POS:" + token.getPartOfSpeech()); } } } }