@Override public synchronized void process( JCas jCas ) throws AnalysisEngineProcessException { LOGGER.info( "Dependency parser starting with thread:" + Thread.currentThread().getName() ); for ( Sentence sentence : JCasUtil.select( jCas, Sentence.class ) ) { List<BaseToken> printableTokens = new ArrayList<>(); for ( BaseToken token : JCasUtil.selectCovered( jCas, BaseToken.class, sentence ) ) { if ( token instanceof NewlineToken ) continue; printableTokens.add( token ); } if ( printableTokens.isEmpty() ) { // If there are no printable tokens then #convert fails continue; } DEPTree tree = new DEPTree(); // Convert CAS data into structures usable by ClearNLP for ( int i = 0; i < printableTokens.size(); i++ ) { BaseToken token = printableTokens.get( i ); String lemma = useLemmatizer ? lemmatizer.getLemma( token.getCoveredText(), token.getPartOfSpeech() ) : token.getNormalizedForm(); DEPNode node = new DEPNode( i + 1, token.getCoveredText(), lemma, token.getPartOfSpeech(), new DEPFeat() ); tree.add( node ); } // Run parser and convert output back to CAS friendly data types synchronized(LOCK){ parser.process( tree ); ArrayList<ConllDependencyNode> nodes = ClearDependencyUtility.convert( jCas, tree, sentence, printableTokens ); DependencyUtility.addToIndexes( jCas, nodes ); } } LOGGER.info( "Dependency parser ending with thread:" + Thread.currentThread().getName() ); }
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { List<BaseToken> printableTokens = new ArrayList<>(); for(BaseToken token : JCasUtil.selectCovered(jCas, BaseToken.class, sentence)){ if(token instanceof NewlineToken) continue; printableTokens.add(token); } if ( printableTokens.isEmpty() ) { // If there are no printable tokens then #convert fails continue; } DEPTree tree = new DEPTree(); // Convert CAS data into structures usable by ClearNLP for (int i = 0; i < printableTokens.size(); i++) { BaseToken token = printableTokens.get(i); String lemma = useLemmatizer ? lemmatizer.getLemma(token.getCoveredText(), token.getPartOfSpeech()) : token.getNormalizedForm(); DEPNode node = new DEPNode(i+1, token.getCoveredText(), lemma, token.getPartOfSpeech(), new DEPFeat()); tree.add(node); } // Run parser and convert output back to CAS friendly data types parser.process(tree); ArrayList<ConllDependencyNode> nodes = ClearDependencyUtility.convert( jCas, tree, sentence, printableTokens ); DependencyUtility.addToIndexes( jCas, nodes ); } }