private void addPartOfSpeech(final WordToken wt) { String pos = wt.getPartOfSpeech(); partOfSpeech.put(pos, wt); ROOT_POS.forEach( root -> { if (pos.startsWith(root)) { partOfSpeech.put(root, wt); } }); }
private void addPartOfSpeech(final WordToken wt) { String pos = wt.getPartOfSpeech(); partOfSpeech.put(pos, wt); ROOT_POS.forEach( root -> { if (pos.startsWith(root)) { partOfSpeech.put(root, wt); } }); }
/** * Check if the given word token satisfies the constrains of this dependency node. * * <p>To match, the word token must have the same root part of speech (eg NN will also match NNP, * NNS etc.) and the covered text must satisfy the content regular expression, if defined. * * @param wt * @return true if this matches */ public boolean matches(WordToken wt) { return wt.getPartOfSpeech().startsWith(getType()) && (StringUtils.isEmpty(getContent()) || wt.getCoveredText().matches(getContent())); }
/** * Check if the given word token satisfies the constrains of this dependency node. * * <p>To match, the word token must have the same root part of speech (eg NN will also match NNP, * NNS etc.) and the covered text must satisfy the content regular expression, if defined. * * @param wt * @return true if this matches */ public boolean matches(WordToken wt) { return wt.getPartOfSpeech().startsWith(getType()) && (StringUtils.isEmpty(getContent()) || wt.getCoveredText().matches(getContent())); }
@Before public void before() { pe = new PatternExtract(from, to, 0, 10); Mockito.when(token.getPartOfSpeech()).thenReturn("NN"); Mockito.when(token.getCoveredText()).thenReturn("token"); }
t -> { list.add(normalize(t.getCoveredText())); list.add(t.getPartOfSpeech()); }); t -> { list.add(normalize(t.getCoveredText())); list.add(t.getPartOfSpeech()); });
new DependencyNode( Long.toString(dependent.getInternalId()), dependent.getPartOfSpeech(), dependent.getCoveredText()); DependencyTree dependencyTree = new DependencyTree(dependencyNode);
new DependencyNode( Long.toString(dependent.getInternalId()), dependent.getPartOfSpeech(), dependent.getCoveredText()); DependencyTree dependencyTree = new DependencyTree(dependencyNode);
/** * Save words. * * @param pattern the pattern * @return the DB object */ private List<Object> saveWords(final Pattern pattern) { final List<Object> list = new ArrayList<>(); for (int i = 0; i < pattern.getWords().size(); i++) { final WordToken w = pattern.getWords(i); final Document o = new Document().append("text", w.getCoveredText()).append("pos", w.getPartOfSpeech()); if (w.getLemmas() != null && w.getLemmas().size() >= 1) { o.put("lemma", w.getLemmas(0).getLemmaForm()); } list.add(o); } return list; }
@Test public void test() throws Exception { String text = "This is some text. It has three sentences. The first sentence has four words."; jCas.setDocumentText(text); SimplePipeline.runPipeline(jCas, ae); assertEquals(3, JCasUtil.select(jCas, Sentence.class).size()); // 3 sentences Sentence s1 = JCasUtil.selectByIndex(jCas, Sentence.class, 0); List<WordToken> tokens = JCasUtil.selectCovered(jCas, WordToken.class, s1); assertEquals(5, tokens.size()); // 5 tokens in the first sentence assertEquals("NN", tokens.get(3).getPartOfSpeech()); // 4th token of first sentence is a noun List<PhraseChunk> phrases = JCasUtil.selectCovered(jCas, PhraseChunk.class, s1); assertEquals(3, phrases.size()); // 3 chunks in the first sentence assertEquals( "some text", phrases.get(2).getCoveredText()); // 3rd chunk of 1st sentence is "some text" }
@Test public void testWiithText() throws Exception { String text = "This is some text. It has three sentences. The first sentence has four words."; jCas.setDocumentText(text); Text t1 = new Text(jCas, 19, 43); t1.addToIndexes(); Text t2 = new Text(jCas, 43, jCas.getDocumentText().length()); t2.addToIndexes(); SimplePipeline.runPipeline(jCas, ae); assertEquals(2, JCasUtil.select(jCas, Sentence.class).size()); // 2 sentences // note due to text the first sentence annotation is the second in the text Sentence s1 = JCasUtil.selectByIndex(jCas, Sentence.class, 0); List<WordToken> tokens = JCasUtil.selectCovered(jCas, WordToken.class, s1); assertEquals(5, tokens.size()); // 5 tokens in the first sentence assertEquals("NNS", tokens.get(3).getPartOfSpeech()); // 4th token of first sentence is a noun List<PhraseChunk> phrases = JCasUtil.selectCovered(jCas, PhraseChunk.class, s1); assertEquals(3, phrases.size()); // 3 chunks in the first sentence assertEquals( "three sentences", phrases.get(2).getCoveredText()); // 3rd chunk of 1st sentence is "some text" } }