/** * @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public WordToken(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
/** * @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public WordToken(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
public static List<WordToken> createWordTokens(JCas jCas, String regex) { List<WordToken> words = new ArrayList<>(); String documentText = jCas.getDocumentText(); Matcher matcher = Pattern.compile(regex).matcher(documentText); int begin = 0; int end = 0; while (matcher.find()) { end = matcher.start(); WordToken wt = new WordToken(jCas); wt.setBegin(begin); wt.setEnd(end); wt.addToIndexes(); words.add(wt); begin = matcher.end(); } return words; }
private WordToken createWordToken(Sentence sentence, int i) { WordToken wordToken = new WordToken(jCas); wordToken.setBegin(sentence.startOffsets()[i]); wordToken.setEnd(sentence.endOffsets()[i]); Option<String[]> tags = sentence.tags(); if (tags.isDefined()) { wordToken.setPartOfSpeech(tags.get()[i]); } Option<String[]> lemmas = sentence.lemmas(); if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) { wordToken.setLemmas(new FSArray(jCas, 1)); WordLemma lemma = new WordLemma(jCas); lemma.setLemmaForm(lemmas.get()[i]); if (tags.isDefined()) { lemma.setPartOfSpeech(tags.get()[i]); } lemma.addToIndexes(); wordToken.setLemmas(0, lemma); } else { wordToken.setLemmas(new FSArray(jCas, 0)); } wordToken.addToIndexes(); return wordToken; }
private WordToken createWordToken(Sentence sentence, int i) { WordToken wordToken = new WordToken(jCas); wordToken.setBegin(sentence.startOffsets()[i]); wordToken.setEnd(sentence.endOffsets()[i]); Option<String[]> tags = sentence.tags(); if (tags.isDefined()) { wordToken.setPartOfSpeech(tags.get()[i]); } Option<String[]> lemmas = sentence.lemmas(); if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) { wordToken.setLemmas(new FSArray(jCas, 1)); WordLemma lemma = new WordLemma(jCas); lemma.setLemmaForm(lemmas.get()[i]); if (tags.isDefined()) { lemma.setPartOfSpeech(tags.get()[i]); } lemma.addToIndexes(); wordToken.setLemmas(0, lemma); } else { wordToken.setLemmas(new FSArray(jCas, 0)); } wordToken.addToIndexes(); return wordToken; }
@Test public void testAddsLemma() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken t = new WordToken(jCas); t.setBegin(jCas.getDocumentText().indexOf("working")); t.setEnd(t.getBegin() + "working".length()); t.setPartOfSpeech("VERB"); t.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals("work", out.get(0).getLemmas(0).getLemmaForm()); }
@Test public void testAddsLemmaToExistingLemmas() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken s = new WordToken(jCas); s.setBegin(jCas.getDocumentText().indexOf("working")); s.setEnd(s.getBegin() + "working".length()); s.setPartOfSpeech("VERB"); s.setLemmas(new FSArray(jCas, 1)); final WordLemma existingLemma = new WordLemma(jCas); existingLemma.setPartOfSpeech("existing"); existingLemma.setLemmaForm("existing"); s.setLemmas(0, existingLemma); s.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals(existingLemma, out.get(0).getLemmas(0)); assertEquals("work", out.get(0).getLemmas(1).getLemmaForm()); } }
@Test public void test() throws AnalysisEngineProcessException, ResourceInitializationException { String text = "John say that he would visit London"; jCas.setDocumentText(text); ReferenceTarget rt = new ReferenceTarget(jCas); rt.addToIndexes(); Person p = new Person(jCas); p.setBegin(0); p.setEnd(4); p.setReferent(rt); p.setValue("John"); p.addToIndexes(); WordToken he = new WordToken(jCas); he.setBegin(text.indexOf("he")); he.setEnd(he.getBegin() + "he".length()); he.setReferent(rt); he.addToIndexes(); Location l = new Location(jCas); l.setBegin(text.indexOf("London")); l.setEnd(l.getBegin() + "London".length()); l.setValue("London"); l.addToIndexes(); processJCas(); List<Entity> list = new ArrayList<>(JCasUtil.select(jCas, Entity.class)); assertEquals(3, list.size()); assertEquals("John", list.get(0).getValue()); assertEquals("John", list.get(1).getValue()); assertTrue(list.get(1) instanceof Person); assertEquals("London", list.get(2).getValue()); }
final WordToken wordToken = new WordToken(jCas); wordToken.setBegin(offset); wordToken.setEnd(end);
private void addWordTokens() { WordToken wt0 = new WordToken(jCas); wt0.setBegin(0); wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt0.setSentenceOrder(0); wt1.setEnd(4); wt1.setPartOfSpeech(","); wt1.setSentenceOrder(1); wt2.setEnd(7); wt2.setPartOfSpeech("IN"); wt2.setSentenceOrder(2); wt3.setEnd(12); wt3.setPartOfSpeech("JJ"); wt3.setSentenceOrder(3); wt4.setEnd(13); wt4.setPartOfSpeech(","); wt4.setSentenceOrder(4); wt5.setEnd(20); wt5.setPartOfSpeech("NNS"); wt5.setSentenceOrder(5); wt6.setEnd(27); wt6.setPartOfSpeech("NNP"); wt6.setSentenceOrder(6);
final WordToken wordToken = new WordToken(jCas); wordToken.setBegin(offset); wordToken.setEnd(end);
attackedVerb.setEnd(attacked.getEnd()); attackedVerb.setPartOfSpeech("VBZ"); attackedVerb.addToIndexes(); attackNoun.setEnd(attack.getEnd()); attackNoun.setPartOfSpeech("NNS"); attackNoun.addToIndexes(); attackAdj.setEnd(attackAdj.getEnd()); attackAdj.setPartOfSpeech("ADJ"); attackAdj.addToIndexes();
wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt0.setSentenceOrder(0); wt1.setEnd(11); wt1.setPartOfSpeech("VBD"); wt1.setSentenceOrder(1); wt2.setEnd(18); wt2.setPartOfSpeech("NNP"); wt2.setSentenceOrder(2); wt3.setEnd(19); wt3.setPartOfSpeech("."); wt3.setSentenceOrder(3); wt4.setEnd(23); wt4.setPartOfSpeech("NNP"); wt4.setSentenceOrder(0); wt5.setEnd(28); wt5.setPartOfSpeech("VBD"); wt5.setSentenceOrder(1); wt6.setEnd(31); wt6.setPartOfSpeech("TO"); wt6.setSentenceOrder(2); wt7.setEnd(37);
wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt0.setSentenceOrder(0); wt1.setEnd(11); wt1.setPartOfSpeech("VBD"); wt1.setSentenceOrder(1); wt2.setEnd(18); wt2.setPartOfSpeech("NNP"); wt2.setSentenceOrder(2); wt3.setEnd(19); wt3.setPartOfSpeech(","); wt3.setSentenceOrder(3); wt4.setEnd(25); wt4.setPartOfSpeech("NNP"); wt4.setSentenceOrder(4); wt5.setEnd(29); wt5.setPartOfSpeech("CC"); wt5.setSentenceOrder(5); wt6.setEnd(33); wt6.setPartOfSpeech("NNP"); wt6.setSentenceOrder(6); wt7.setEnd(38);
private void addAnnotations() { WordToken wt0 = new WordToken(jCas); wt0.setBegin(0); wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt0.setSentenceOrder(0); wt1.setEnd(11); wt1.setPartOfSpeech("VBD"); wt1.setSentenceOrder(1); wt2.setEnd(18); wt2.setPartOfSpeech("NNP"); wt2.setSentenceOrder(2); wt3.setEnd(22); wt3.setPartOfSpeech("CC"); wt3.setSentenceOrder(3); wt4.setEnd(28); wt4.setPartOfSpeech("NNP"); wt4.setSentenceOrder(4); wt5.setEnd(29); wt5.setPartOfSpeech("."); wt5.setSentenceOrder(5);
private void addDependencyInformation() { WordToken wt0 = new WordToken(jCas); wt0.setBegin(0); wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt0.setSentenceOrder(0); wt1.setEnd(4); wt1.setPartOfSpeech(","); wt1.setSentenceOrder(1); wt2.setEnd(7); wt2.setPartOfSpeech("IN"); wt2.setSentenceOrder(2); wt3.setEnd(12); wt3.setPartOfSpeech("JJ"); wt3.setSentenceOrder(3); wt4.setEnd(13); wt4.setPartOfSpeech(","); wt4.setSentenceOrder(4); wt5.setEnd(20); wt5.setPartOfSpeech("NNS"); wt5.setSentenceOrder(5); wt6.setEnd(27); wt6.setPartOfSpeech("NNP"); wt6.setSentenceOrder(6);
jumps.setEnd(8 + "jumps".length()); jumps.setPartOfSpeech("VB"); jumps.addToIndexes();
jumps.setEnd(8 + "jumps".length()); jumps.setPartOfSpeech("VB"); final WordLemma jumpLemma = new WordLemma(jCas);