private String getLemma(final WordToken token) { final FSArray array = token.getLemmas(); if (array == null || array.size() == 0) { return token.getCoveredText().toLowerCase(); } else { return ((WordLemma) array.get(0)).getLemmaForm(); } }
private String getLemma(final WordToken token) { final FSArray array = token.getLemmas(); if (array == null || array.size() == 0) { return token.getCoveredText().toLowerCase(); } else { return ((WordLemma) array.get(0)).getLemmaForm(); } }
/** * Save words. * * @param pattern the pattern * @return the DB object */ private List<Object> saveWords(final Pattern pattern) { final List<Object> list = new ArrayList<>(); for (int i = 0; i < pattern.getWords().size(); i++) { final WordToken w = pattern.getWords(i); final Document o = new Document().append("text", w.getCoveredText()).append("pos", w.getPartOfSpeech()); if (w.getLemmas() != null && w.getLemmas().size() >= 1) { o.put("lemma", w.getLemmas(0).getLemmaForm()); } list.add(o); } return list; }
@Test public void testAddsLemma() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken t = new WordToken(jCas); t.setBegin(jCas.getDocumentText().indexOf("working")); t.setEnd(t.getBegin() + "working".length()); t.setPartOfSpeech("VERB"); t.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals("work", out.get(0).getLemmas(0).getLemmaForm()); }
@Test public void testAddsLemmaToExistingLemmas() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken s = new WordToken(jCas); s.setBegin(jCas.getDocumentText().indexOf("working")); s.setEnd(s.getBegin() + "working".length()); s.setPartOfSpeech("VERB"); s.setLemmas(new FSArray(jCas, 1)); final WordLemma existingLemma = new WordLemma(jCas); existingLemma.setPartOfSpeech("existing"); existingLemma.setLemmaForm("existing"); s.setLemmas(0, existingLemma); s.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals(existingLemma, out.get(0).getLemmas(0)); assertEquals("work", out.get(0).getLemmas(1).getLemmaForm()); } }