private WordToken createWordToken(Sentence sentence, int i) { WordToken wordToken = new WordToken(jCas); wordToken.setBegin(sentence.startOffsets()[i]); wordToken.setEnd(sentence.endOffsets()[i]); Option<String[]> tags = sentence.tags(); if (tags.isDefined()) { wordToken.setPartOfSpeech(tags.get()[i]); } Option<String[]> lemmas = sentence.lemmas(); if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) { wordToken.setLemmas(new FSArray(jCas, 1)); WordLemma lemma = new WordLemma(jCas); lemma.setLemmaForm(lemmas.get()[i]); if (tags.isDefined()) { lemma.setPartOfSpeech(tags.get()[i]); } lemma.addToIndexes(); wordToken.setLemmas(0, lemma); } else { wordToken.setLemmas(new FSArray(jCas, 0)); } wordToken.addToIndexes(); return wordToken; }
private WordToken createWordToken(Sentence sentence, int i) { WordToken wordToken = new WordToken(jCas); wordToken.setBegin(sentence.startOffsets()[i]); wordToken.setEnd(sentence.endOffsets()[i]); Option<String[]> tags = sentence.tags(); if (tags.isDefined()) { wordToken.setPartOfSpeech(tags.get()[i]); } Option<String[]> lemmas = sentence.lemmas(); if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) { wordToken.setLemmas(new FSArray(jCas, 1)); WordLemma lemma = new WordLemma(jCas); lemma.setLemmaForm(lemmas.get()[i]); if (tags.isDefined()) { lemma.setPartOfSpeech(tags.get()[i]); } lemma.addToIndexes(); wordToken.setLemmas(0, lemma); } else { wordToken.setLemmas(new FSArray(jCas, 0)); } wordToken.addToIndexes(); return wordToken; }
@Test public void testAddsLemmaToExistingLemmas() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken s = new WordToken(jCas); s.setBegin(jCas.getDocumentText().indexOf("working")); s.setEnd(s.getBegin() + "working".length()); s.setPartOfSpeech("VERB"); s.setLemmas(new FSArray(jCas, 1)); final WordLemma existingLemma = new WordLemma(jCas); existingLemma.setPartOfSpeech("existing"); existingLemma.setLemmaForm("existing"); s.setLemmas(0, existingLemma); s.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals(existingLemma, out.get(0).getLemmas(0)); assertEquals("work", out.get(0).getLemmas(1).getLemmaForm()); } }