private WordToken createWordToken(Sentence sentence, int i) { WordToken wordToken = new WordToken(jCas); wordToken.setBegin(sentence.startOffsets()[i]); wordToken.setEnd(sentence.endOffsets()[i]); Option<String[]> tags = sentence.tags(); if (tags.isDefined()) { wordToken.setPartOfSpeech(tags.get()[i]); } Option<String[]> lemmas = sentence.lemmas(); if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) { wordToken.setLemmas(new FSArray(jCas, 1)); WordLemma lemma = new WordLemma(jCas); lemma.setLemmaForm(lemmas.get()[i]); if (tags.isDefined()) { lemma.setPartOfSpeech(tags.get()[i]); } lemma.addToIndexes(); wordToken.setLemmas(0, lemma); } else { wordToken.setLemmas(new FSArray(jCas, 0)); } wordToken.addToIndexes(); return wordToken; }
private WordToken createWordToken(Sentence sentence, int i) { WordToken wordToken = new WordToken(jCas); wordToken.setBegin(sentence.startOffsets()[i]); wordToken.setEnd(sentence.endOffsets()[i]); Option<String[]> tags = sentence.tags(); if (tags.isDefined()) { wordToken.setPartOfSpeech(tags.get()[i]); } Option<String[]> lemmas = sentence.lemmas(); if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) { wordToken.setLemmas(new FSArray(jCas, 1)); WordLemma lemma = new WordLemma(jCas); lemma.setLemmaForm(lemmas.get()[i]); if (tags.isDefined()) { lemma.setPartOfSpeech(tags.get()[i]); } lemma.addToIndexes(); wordToken.setLemmas(0, lemma); } else { wordToken.setLemmas(new FSArray(jCas, 0)); } wordToken.addToIndexes(); return wordToken; }
@Test public void testAddsLemma() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken t = new WordToken(jCas); t.setBegin(jCas.getDocumentText().indexOf("working")); t.setEnd(t.getBegin() + "working".length()); t.setPartOfSpeech("VERB"); t.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals("work", out.get(0).getLemmas(0).getLemmaForm()); }
@Test public void testMatches() throws UIMAException { JCas jCas = JCasSingleton.getJCasInstance(); RelationConstraint rc = new RelationConstraint("type", "subType", "pos", "source", "target"); Interaction i = new Interaction(jCas); assertFalse(rc.matches(i, Collections.emptyList())); i.setRelationshipType("type"); assertFalse(rc.matches(i, Collections.emptyList())); i.setRelationSubType("subtype"); assertTrue(rc.matches(i, Collections.emptyList())); WordToken wt1 = new WordToken(jCas); wt1.setPartOfSpeech("VERB"); WordToken wt2 = new WordToken(jCas); wt2.setPartOfSpeech("POS"); List<WordToken> wordTokens = new ArrayList<>(); wordTokens.add(wt1); assertFalse(rc.matches(i, wordTokens)); wordTokens.add(wt2); assertTrue(rc.matches(i, wordTokens)); } }
@Test public void testAddsLemmaToExistingLemmas() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken s = new WordToken(jCas); s.setBegin(jCas.getDocumentText().indexOf("working")); s.setEnd(s.getBegin() + "working".length()); s.setPartOfSpeech("VERB"); s.setLemmas(new FSArray(jCas, 1)); final WordLemma existingLemma = new WordLemma(jCas); existingLemma.setPartOfSpeech("existing"); existingLemma.setLemmaForm("existing"); s.setLemmas(0, existingLemma); s.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals(existingLemma, out.get(0).getLemmas(0)); assertEquals("work", out.get(0).getLemmas(1).getLemmaForm()); } }
wordToken.setEnd(end); wordToken.setPartOfSpeech("VBZ"); wordToken.addToIndexes(jCas); offset = end + 1;
wordToken.setEnd(end); wordToken.setPartOfSpeech("VBZ"); wordToken.addToIndexes(jCas); offset = end + 1;
wt0.setBegin(0); wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt0.setSentenceOrder(0); wt0.addToIndexes(jCas); wt1.setPartOfSpeech(","); wt1.setSentenceOrder(1); wt1.addToIndexes(jCas); wt2.setBegin(5); wt2.setEnd(7); wt2.setPartOfSpeech("IN"); wt2.setSentenceOrder(2); wt2.addToIndexes(jCas); wt3.setPartOfSpeech("JJ"); wt3.setSentenceOrder(3); wt3.addToIndexes(jCas); wt4.setBegin(12); wt4.setEnd(13); wt4.setPartOfSpeech(","); wt4.setSentenceOrder(4); wt4.addToIndexes(jCas); wt5.setBegin(14); wt5.setEnd(20); wt5.setPartOfSpeech("NNS");
attackedVerb.setBegin(attacked.getBegin()); attackedVerb.setEnd(attacked.getEnd()); attackedVerb.setPartOfSpeech("VBZ"); attackedVerb.addToIndexes(); attackNoun.setBegin(attack.getBegin()); attackNoun.setEnd(attack.getEnd()); attackNoun.setPartOfSpeech("NNS"); attackNoun.addToIndexes(); attackAdj.setBegin(attackAdj.getBegin()); attackAdj.setEnd(attackAdj.getEnd()); attackAdj.setPartOfSpeech("ADJ"); attackAdj.addToIndexes();
wt0.setBegin(0); wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt0.setSentenceOrder(0); wt0.addToIndexes(jCas); wt1.setBegin(4); wt1.setEnd(11); wt1.setPartOfSpeech("VBD"); wt1.setSentenceOrder(1); wt1.addToIndexes(jCas); wt2.setBegin(12); wt2.setEnd(18); wt2.setPartOfSpeech("NNP"); wt2.setSentenceOrder(2); wt2.addToIndexes(jCas); wt3.setBegin(18); wt3.setEnd(19); wt3.setPartOfSpeech("."); wt3.setSentenceOrder(3); wt3.addToIndexes(jCas); wt4.setBegin(20); wt4.setEnd(23); wt4.setPartOfSpeech("NNP"); wt4.setSentenceOrder(0); wt4.addToIndexes(jCas);
a.setPartOfSpeech("DT"); a.addToIndexes(); sample = new WordToken(jCas, 2, 8); sample.setPartOfSpeech("NN"); sample.addToIndexes(); of = new WordToken(jCas, 9, 11); of.setPartOfSpeech("IN"); of.addToIndexes(); text = new WordToken(jCas, 12, 16); text.setPartOfSpeech("NN"); text.addToIndexes();
wt0.setBegin(0); wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt0.setSentenceOrder(0); wt0.addToIndexes(jCas); wt1.setBegin(4); wt1.setEnd(11); wt1.setPartOfSpeech("VBD"); wt1.setSentenceOrder(1); wt1.addToIndexes(jCas); wt2.setBegin(12); wt2.setEnd(18); wt2.setPartOfSpeech("NNP"); wt2.setSentenceOrder(2); wt2.addToIndexes(jCas); wt3.setBegin(18); wt3.setEnd(19); wt3.setPartOfSpeech(","); wt3.setSentenceOrder(3); wt3.addToIndexes(jCas); wt4.setBegin(20); wt4.setEnd(25); wt4.setPartOfSpeech("NNP"); wt4.setSentenceOrder(4); wt4.addToIndexes(jCas);
wt0.setBegin(0); wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt0.setSentenceOrder(0); wt0.addToIndexes(jCas); wt1.setBegin(4); wt1.setEnd(11); wt1.setPartOfSpeech("VBD"); wt1.setSentenceOrder(1); wt1.addToIndexes(jCas); wt2.setBegin(12); wt2.setEnd(18); wt2.setPartOfSpeech("NNP"); wt2.setSentenceOrder(2); wt2.addToIndexes(jCas); wt3.setBegin(19); wt3.setEnd(22); wt3.setPartOfSpeech("CC"); wt3.setSentenceOrder(3); wt3.addToIndexes(jCas); wt4.setBegin(23); wt4.setEnd(28); wt4.setPartOfSpeech("NNP"); wt4.setSentenceOrder(4); wt4.addToIndexes(jCas);
wt0.setBegin(0); wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt0.setSentenceOrder(0); wt0.addToIndexes(jCas); wt1.setPartOfSpeech(","); wt1.setSentenceOrder(1); wt1.addToIndexes(jCas); wt2.setBegin(5); wt2.setEnd(7); wt2.setPartOfSpeech("IN"); wt2.setSentenceOrder(2); wt2.addToIndexes(jCas); wt3.setPartOfSpeech("JJ"); wt3.setSentenceOrder(3); wt3.addToIndexes(jCas); wt4.setBegin(12); wt4.setEnd(13); wt4.setPartOfSpeech(","); wt4.setSentenceOrder(4); wt4.addToIndexes(jCas); wt5.setBegin(14); wt5.setEnd(20); wt5.setPartOfSpeech("NNS");
wt0.setBegin(0); wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt0.setSentenceOrder(0); wt0.addToIndexes(jCas); wt1.setBegin(4); wt1.setEnd(11); wt1.setPartOfSpeech("VBD"); wt1.setSentenceOrder(1); wt1.addToIndexes(jCas); wt2.setBegin(12); wt2.setEnd(18); wt2.setPartOfSpeech("NNP"); wt2.setSentenceOrder(2); wt2.addToIndexes(jCas); wt3.setBegin(18); wt3.setEnd(19); wt3.setPartOfSpeech(","); wt3.setSentenceOrder(3); wt3.addToIndexes(jCas); wt4.setBegin(20); wt4.setEnd(25); wt4.setPartOfSpeech("NNP"); wt4.setSentenceOrder(4); wt4.addToIndexes(jCas);
jumps.setBegin(8); jumps.setEnd(8 + "jumps".length()); jumps.setPartOfSpeech("VB"); jumps.addToIndexes();
jumps.setBegin(8); jumps.setEnd(8 + "jumps".length()); jumps.setPartOfSpeech("VB"); final WordLemma jumpLemma = new WordLemma(jCas); jumpLemma.setLemmaForm("jump");