/** * @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public WordToken(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
/** * @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public WordToken(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
public static List<WordToken> createWordTokens(JCas jCas, String regex) { List<WordToken> words = new ArrayList<>(); String documentText = jCas.getDocumentText(); Matcher matcher = Pattern.compile(regex).matcher(documentText); int begin = 0; int end = 0; while (matcher.find()) { end = matcher.start(); WordToken wt = new WordToken(jCas); wt.setBegin(begin); wt.setEnd(end); wt.addToIndexes(); words.add(wt); begin = matcher.end(); } return words; }
private WordToken createWordToken(Sentence sentence, int i) { WordToken wordToken = new WordToken(jCas); wordToken.setBegin(sentence.startOffsets()[i]); wordToken.setEnd(sentence.endOffsets()[i]); Option<String[]> tags = sentence.tags(); if (tags.isDefined()) { wordToken.setPartOfSpeech(tags.get()[i]); } Option<String[]> lemmas = sentence.lemmas(); if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) { wordToken.setLemmas(new FSArray(jCas, 1)); WordLemma lemma = new WordLemma(jCas); lemma.setLemmaForm(lemmas.get()[i]); if (tags.isDefined()) { lemma.setPartOfSpeech(tags.get()[i]); } lemma.addToIndexes(); wordToken.setLemmas(0, lemma); } else { wordToken.setLemmas(new FSArray(jCas, 0)); } wordToken.addToIndexes(); return wordToken; }
private WordToken createWordToken(Sentence sentence, int i) { WordToken wordToken = new WordToken(jCas); wordToken.setBegin(sentence.startOffsets()[i]); wordToken.setEnd(sentence.endOffsets()[i]); Option<String[]> tags = sentence.tags(); if (tags.isDefined()) { wordToken.setPartOfSpeech(tags.get()[i]); } Option<String[]> lemmas = sentence.lemmas(); if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) { wordToken.setLemmas(new FSArray(jCas, 1)); WordLemma lemma = new WordLemma(jCas); lemma.setLemmaForm(lemmas.get()[i]); if (tags.isDefined()) { lemma.setPartOfSpeech(tags.get()[i]); } lemma.addToIndexes(); wordToken.setLemmas(0, lemma); } else { wordToken.setLemmas(new FSArray(jCas, 0)); } wordToken.addToIndexes(); return wordToken; }
@Test public void testAddsLemma() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken t = new WordToken(jCas); t.setBegin(jCas.getDocumentText().indexOf("working")); t.setEnd(t.getBegin() + "working".length()); t.setPartOfSpeech("VERB"); t.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals("work", out.get(0).getLemmas(0).getLemmaForm()); }
@Test public void testAddsLemmaToExistingLemmas() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken s = new WordToken(jCas); s.setBegin(jCas.getDocumentText().indexOf("working")); s.setEnd(s.getBegin() + "working".length()); s.setPartOfSpeech("VERB"); s.setLemmas(new FSArray(jCas, 1)); final WordLemma existingLemma = new WordLemma(jCas); existingLemma.setPartOfSpeech("existing"); existingLemma.setLemmaForm("existing"); s.setLemmas(0, existingLemma); s.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals(existingLemma, out.get(0).getLemmas(0)); assertEquals("work", out.get(0).getLemmas(1).getLemmaForm()); } }
@Test public void test() throws AnalysisEngineProcessException, ResourceInitializationException { String text = "John say that he would visit London"; jCas.setDocumentText(text); ReferenceTarget rt = new ReferenceTarget(jCas); rt.addToIndexes(); Person p = new Person(jCas); p.setBegin(0); p.setEnd(4); p.setReferent(rt); p.setValue("John"); p.addToIndexes(); WordToken he = new WordToken(jCas); he.setBegin(text.indexOf("he")); he.setEnd(he.getBegin() + "he".length()); he.setReferent(rt); he.addToIndexes(); Location l = new Location(jCas); l.setBegin(text.indexOf("London")); l.setEnd(l.getBegin() + "London".length()); l.setValue("London"); l.addToIndexes(); processJCas(); List<Entity> list = new ArrayList<>(JCasUtil.select(jCas, Entity.class)); assertEquals(3, list.size()); assertEquals("John", list.get(0).getValue()); assertEquals("John", list.get(1).getValue()); assertTrue(list.get(1) instanceof Person); assertEquals("London", list.get(2).getValue()); }
wordToken.setBegin(offset); wordToken.setEnd(end);
private void addWordTokens() { WordToken wt0 = new WordToken(jCas); wt0.setBegin(0); wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt1.setBegin(3); wt1.setEnd(4); wt1.setPartOfSpeech(","); wt2.setBegin(5); wt2.setEnd(7); wt2.setPartOfSpeech("IN"); wt3.setBegin(8); wt3.setEnd(12); wt3.setPartOfSpeech("JJ"); wt4.setBegin(12); wt4.setEnd(13); wt4.setPartOfSpeech(","); wt5.setBegin(14); wt5.setEnd(20); wt5.setPartOfSpeech("NNS"); wt6.setBegin(21); wt6.setEnd(27); wt6.setPartOfSpeech("NNP");
wordToken.setBegin(offset); wordToken.setEnd(end);
attackedVerb.setBegin(attacked.getBegin()); attackedVerb.setEnd(attacked.getEnd()); attackedVerb.setPartOfSpeech("VBZ"); attackNoun.setBegin(attack.getBegin()); attackNoun.setEnd(attack.getEnd()); attackNoun.setPartOfSpeech("NNS"); attackAdj.setBegin(attackAdj.getBegin()); attackAdj.setEnd(attackAdj.getEnd()); attackAdj.setPartOfSpeech("ADJ");
wt0.setBegin(0); wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt1.setBegin(4); wt1.setEnd(11); wt1.setPartOfSpeech("VBD"); wt2.setBegin(12); wt2.setEnd(18); wt2.setPartOfSpeech("NNP"); wt3.setBegin(18); wt3.setEnd(19); wt3.setPartOfSpeech("."); wt4.setBegin(20); wt4.setEnd(23); wt4.setPartOfSpeech("NNP"); wt5.setBegin(24); wt5.setEnd(28); wt5.setPartOfSpeech("VBD"); wt6.setBegin(29); wt6.setEnd(31); wt6.setPartOfSpeech("TO"); wt7.setBegin(32);
wt0.setBegin(0); wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt1.setBegin(4); wt1.setEnd(11); wt1.setPartOfSpeech("VBD"); wt2.setBegin(12); wt2.setEnd(18); wt2.setPartOfSpeech("NNP"); wt3.setBegin(18); wt3.setEnd(19); wt3.setPartOfSpeech(","); wt4.setBegin(20); wt4.setEnd(25); wt4.setPartOfSpeech("NNP"); wt5.setBegin(26); wt5.setEnd(29); wt5.setPartOfSpeech("CC"); wt6.setBegin(30); wt6.setEnd(33); wt6.setPartOfSpeech("NNP"); wt7.setBegin(34);
private void addAnnotations() { WordToken wt0 = new WordToken(jCas); wt0.setBegin(0); wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt1.setBegin(4); wt1.setEnd(11); wt1.setPartOfSpeech("VBD"); wt2.setBegin(12); wt2.setEnd(18); wt2.setPartOfSpeech("NNP"); wt3.setBegin(19); wt3.setEnd(22); wt3.setPartOfSpeech("CC"); wt4.setBegin(23); wt4.setEnd(28); wt4.setPartOfSpeech("NNP"); wt5.setBegin(28); wt5.setEnd(29); wt5.setPartOfSpeech(".");
private void addDependencyInformation() { WordToken wt0 = new WordToken(jCas); wt0.setBegin(0); wt0.setEnd(3); wt0.setPartOfSpeech("NNP"); wt1.setBegin(3); wt1.setEnd(4); wt1.setPartOfSpeech(","); wt2.setBegin(5); wt2.setEnd(7); wt2.setPartOfSpeech("IN"); wt3.setBegin(8); wt3.setEnd(12); wt3.setPartOfSpeech("JJ"); wt4.setBegin(12); wt4.setEnd(13); wt4.setPartOfSpeech(","); wt5.setBegin(14); wt5.setEnd(20); wt5.setPartOfSpeech("NNS"); wt6.setBegin(21); wt6.setEnd(27); wt6.setPartOfSpeech("NNP");
jumps.setBegin(8); jumps.setEnd(8 + "jumps".length()); jumps.setPartOfSpeech("VB");
jumps.setBegin(8); jumps.setEnd(8 + "jumps".length()); jumps.setPartOfSpeech("VB");