/** * @param baseToken some token * @return a part of speech text representation if the basetoken is a word token, else "" */ static private String getTokenPos( final BaseToken baseToken ) { if ( !(baseToken instanceof WordToken) ) { return ""; } // We are only interested in tokens that are -words- final String tokenPos = baseToken.getPartOfSpeech(); if ( tokenPos == null ) { return ""; } return tokenPos; }
/** * @param baseToken some token * @return a part of speech text representation if the basetoken is a word token, else "" */ static private String getTokenPos( final BaseToken baseToken ) { if ( !(baseToken instanceof WordToken) ) { return ""; } // We are only interested in tokens that are -words- final String tokenPos = baseToken.getPartOfSpeech(); if ( tokenPos == null ) { return ""; } return tokenPos; }
public static boolean isPronoun(IdentifiedAnnotation a1){ List<BaseToken> tokens = JCasUtil.selectCovered(BaseToken.class, a1); if(tokens.size() != 1){ return false; } BaseToken token = tokens.get(0); if(token.getPartOfSpeech() == null){ return false; } if(token.getPartOfSpeech().startsWith("PRP")) return true; if(token.getPartOfSpeech().equals("DT")) return true; return false; }
public static boolean isPronoun(IdentifiedAnnotation a1){ List<BaseToken> tokens = JCasUtil.selectCovered(BaseToken.class, a1); if(tokens.size() != 1){ return false; } BaseToken token = tokens.get(0); if(token.getPartOfSpeech() == null){ return false; } if(token.getPartOfSpeech().startsWith("PRP")) return true; if(token.getPartOfSpeech().equals("DT")) return true; return false; }
public String calcmNumber () { // use the underlying NE, instead of the expanded markable to find the number //ArrayList<BaseToken> l = containedTokens(m.getContent().getBegin(), m.getContent().getEnd()); ArrayList<BaseToken> l = containedTokens(m.getBegin(), m.getEnd()); for (BaseToken t : l) { String pos = t.getPartOfSpeech(); if (pos.equals("NN") || pos.equals("NNP")) return "S"; else if (pos.equals("NNS") || pos.equals("NNPS")) return "P"; } return "U"; }
static boolean isPronoun (Markable m) { if (m.getContent() instanceof BaseToken) { BaseToken t = (BaseToken) m.getContent(); if (t.getPartOfSpeech().startsWith("PRP")) // TODO: since only 3rd person pronouns are added as markables, no need to check return true; } return false; }
static boolean isPronoun (Markable m) { if (m.getContent() instanceof BaseToken) { BaseToken t = (BaseToken) m.getContent(); if (t.getPartOfSpeech().startsWith("PRP")) // TODO: since only 3rd person pronouns are added as markables, no need to check return true; } return false; }
public String calcmNumber () { // use the underlying NE, instead of the expanded markable to find the number //ArrayList<BaseToken> l = containedTokens(m.getContent().getBegin(), m.getContent().getEnd()); ArrayList<BaseToken> l = containedTokens(m.getBegin(), m.getEnd()); for (BaseToken t : l) { String pos = t.getPartOfSpeech(); if (pos.equals("NN") || pos.equals("NNP")) return "S"; else if (pos.equals("NNS") || pos.equals("NNPS")) return "P"; } return "U"; }
@Override public void process(JCas jcas) throws AnalysisEngineProcessException { // Create a dummy IdentifiedAnnotation in the type system // If the BaseToken Part Of Speech is a Noun Collection<BaseToken> tokens = JCasUtil.select(jcas, BaseToken.class); for (BaseToken token : tokens) { if (saveAnnotation && token.getPartOfSpeech() != null && token.getPartOfSpeech().startsWith("N")) { IdentifiedAnnotation ann = new IdentifiedAnnotation(jcas); ann.setBegin(token.getBegin()); ann.setEnd(token.getEnd()); ann.addToIndexes(); if (printAnnotation) { LOG.info("Token:" + token.getCoveredText() + " POS:" + token.getPartOfSpeech()); } } } }
if(coveredTokens.size() == 1 && coveredTokens.get(0).getPartOfSpeech() != null && coveredTokens.get(0).getPartOfSpeech().startsWith("PRP") && !markable.getCoveredText().toLowerCase().equals("it")){ toRemove.add(markable);
if(coveredTokens.size() == 1 && coveredTokens.get(0).getPartOfSpeech() != null && coveredTokens.get(0).getPartOfSpeech().startsWith("PRP") && !markable.getCoveredText().toLowerCase().equals("it")){ toRemove.add(markable);
String basicNumber (Markable m) { ArrayList<BaseToken> l = containedTokens(m.getContent().getBegin(), m.getContent().getEnd()); for (BaseToken t : l) { String pos = t.getPartOfSpeech(); if (pos.equals("NN") || pos.equals("NNP")) return "S"; else if (pos.equals("NNS") || pos.equals("NNPS")) return "P"; // else if (pos.equals("PRP")) { // if (m.getCoveredText().equalsIgnoreCase("we") || m.getCoveredText().equalsIgnoreCase("they")) // return "P"; // else // return "S"; // } } return "U"; }
String basicNumber (Markable m) { ArrayList<BaseToken> l = containedTokens(m.getContent().getBegin(), m.getContent().getEnd()); for (BaseToken t : l) { String pos = t.getPartOfSpeech(); if (pos.equals("NN") || pos.equals("NNP")) return "S"; else if (pos.equals("NNS") || pos.equals("NNPS")) return "P"; // else if (pos.equals("PRP")) { // if (m.getCoveredText().equalsIgnoreCase("we") || m.getCoveredText().equalsIgnoreCase("they")) // return "P"; // else // return "S"; // } } return "U"; }
String pos = baseTokens.get(0).getPartOfSpeech();
/** * @param baseToken - * @return annotation type, part of speech and covered text in a fhir codeable concept. */ static public CodeableConcept createPosCode( final BaseToken baseToken ) { final CodeableConcept codeableConcept = createSimpleCode( baseToken ); if ( baseToken instanceof WordToken ) { // We are only interested in tokens that are -words- final String pos = baseToken.getPartOfSpeech(); codeableConcept.addCoding( new Coding( CODING_PART_OF_SPEECH, pos, "" ) ); } return codeableConcept; }
String pos = baseTokens.get(0).getPartOfSpeech();
@Override public synchronized void process( JCas jCas ) throws AnalysisEngineProcessException { LOGGER.info( "Dependency parser starting with thread:" + Thread.currentThread().getName() ); for ( Sentence sentence : JCasUtil.select( jCas, Sentence.class ) ) { List<BaseToken> printableTokens = new ArrayList<>(); for ( BaseToken token : JCasUtil.selectCovered( jCas, BaseToken.class, sentence ) ) { if ( token instanceof NewlineToken ) continue; printableTokens.add( token ); } if ( printableTokens.isEmpty() ) { // If there are no printable tokens then #convert fails continue; } DEPTree tree = new DEPTree(); // Convert CAS data into structures usable by ClearNLP for ( int i = 0; i < printableTokens.size(); i++ ) { BaseToken token = printableTokens.get( i ); String lemma = useLemmatizer ? lemmatizer.getLemma( token.getCoveredText(), token.getPartOfSpeech() ) : token.getNormalizedForm(); DEPNode node = new DEPNode( i + 1, token.getCoveredText(), lemma, token.getPartOfSpeech(), new DEPFeat() ); tree.add( node ); } // Run parser and convert output back to CAS friendly data types synchronized(LOCK){ parser.process( tree ); ArrayList<ConllDependencyNode> nodes = ClearDependencyUtility.convert( jCas, tree, sentence, printableTokens ); DependencyUtility.addToIndexes( jCas, nodes ); } } LOGGER.info( "Dependency parser ending with thread:" + Thread.currentThread().getName() ); }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { List<BaseToken> printableTokens = new ArrayList<>(); for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, sentence)){ if(token instanceof NewlineToken) continue; printableTokens.add(token); } if ( printableTokens.isEmpty() ) { // If there are no printable tokens then #convert fails continue; } DEPTree tree = new DEPTree(); // Convert CAS data into structures usable by ClearNLP for (int i = 0; i < printableTokens.size(); i++) { BaseToken token = printableTokens.get(i); String lemma = useLemmatizer ? lemmatizer.getLemma(token.getCoveredText(), token.getPartOfSpeech()) : token.getNormalizedForm(); DEPNode node = new DEPNode(i+1, token.getCoveredText(), lemma, token.getPartOfSpeech(), new DEPFeat()); tree.add(node); } // Run parser and convert output back to CAS friendly data types parser.process(tree); ArrayList<ConllDependencyNode> nodes = ClearDependencyUtility.convert( jCas, tree, sentence, printableTokens ); DependencyUtility.addToIndexes( jCas, nodes ); } }
if ( isTagExcluded( bta.getPartOfSpeech() ) ) { lt.addStringAttribute( FirstTokenPermutationImpl.LT_KEY_USE_FOR_LOOKUP, FALSE_STRING ); } else {
tokenTree.addChild(new SimpleTree(token.getPartOfSpeech()));