private void createWordTokens(JCas jCas) { Pattern p = Pattern.compile("[A-Za-z]+"); Matcher m = p.matcher(jCas.getDocumentText()); while (m.find()) { new WordToken(jCas, m.start(), m.end()).addToIndexes(); } } }
private void createWordTokens(JCas jCas) { Pattern p = Pattern.compile("[A-Za-z]+"); Matcher m = p.matcher(jCas.getDocumentText()); while (m.find()) { new WordToken(jCas, m.start(), m.end()).addToIndexes(); } } }
private void createWordTokens(JCas jCas) { Pattern p = Pattern.compile("[A-Za-z]+"); Matcher m = p.matcher(jCas.getDocumentText()); while (m.find()) { new WordToken(jCas, m.start(), m.end()).addToIndexes(); } } }
private void addWordTokens(JCas jCas) { Pattern p = Pattern.compile("[a-z]+", Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(jCas.getDocumentText()); while (m.find()) { WordToken wt = new WordToken(jCas, m.start(), m.end()); wt.addToIndexes(); } } }
public static List<WordToken> createWordTokens(JCas jCas, String regex) { List<WordToken> words = new ArrayList<>(); String documentText = jCas.getDocumentText(); Matcher matcher = Pattern.compile(regex).matcher(documentText); int begin = 0; int end = 0; while (matcher.find()) { end = matcher.start(); WordToken wt = new WordToken(jCas); wt.setBegin(begin); wt.setEnd(end); wt.addToIndexes(); words.add(wt); begin = matcher.end(); } return words; }
private WordToken createWordToken(Sentence sentence, int i) { WordToken wordToken = new WordToken(jCas); wordToken.setBegin(sentence.startOffsets()[i]); wordToken.setEnd(sentence.endOffsets()[i]); Option<String[]> tags = sentence.tags(); if (tags.isDefined()) { wordToken.setPartOfSpeech(tags.get()[i]); } Option<String[]> lemmas = sentence.lemmas(); if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) { wordToken.setLemmas(new FSArray(jCas, 1)); WordLemma lemma = new WordLemma(jCas); lemma.setLemmaForm(lemmas.get()[i]); if (tags.isDefined()) { lemma.setPartOfSpeech(tags.get()[i]); } lemma.addToIndexes(); wordToken.setLemmas(0, lemma); } else { wordToken.setLemmas(new FSArray(jCas, 0)); } wordToken.addToIndexes(); return wordToken; }
private WordToken createWordToken(Sentence sentence, int i) { WordToken wordToken = new WordToken(jCas); wordToken.setBegin(sentence.startOffsets()[i]); wordToken.setEnd(sentence.endOffsets()[i]); Option<String[]> tags = sentence.tags(); if (tags.isDefined()) { wordToken.setPartOfSpeech(tags.get()[i]); } Option<String[]> lemmas = sentence.lemmas(); if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) { wordToken.setLemmas(new FSArray(jCas, 1)); WordLemma lemma = new WordLemma(jCas); lemma.setLemmaForm(lemmas.get()[i]); if (tags.isDefined()) { lemma.setPartOfSpeech(tags.get()[i]); } lemma.addToIndexes(); wordToken.setLemmas(0, lemma); } else { wordToken.setLemmas(new FSArray(jCas, 0)); } wordToken.addToIndexes(); return wordToken; }
@Test public void testAddsLemma() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken t = new WordToken(jCas); t.setBegin(jCas.getDocumentText().indexOf("working")); t.setEnd(t.getBegin() + "working".length()); t.setPartOfSpeech("VERB"); t.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals("work", out.get(0).getLemmas(0).getLemmaForm()); }
@Test public void testAddsLemmaToExistingLemmas() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken s = new WordToken(jCas); s.setBegin(jCas.getDocumentText().indexOf("working")); s.setEnd(s.getBegin() + "working".length()); s.setPartOfSpeech("VERB"); s.setLemmas(new FSArray(jCas, 1)); final WordLemma existingLemma = new WordLemma(jCas); existingLemma.setPartOfSpeech("existing"); existingLemma.setLemmaForm("existing"); s.setLemmas(0, existingLemma); s.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals(existingLemma, out.get(0).getLemmas(0)); assertEquals("work", out.get(0).getLemmas(1).getLemmaForm()); } }
tokens[0].setBegin(0); tokens[0].setEnd(0); tokens[0].addToIndexes(); tokens[1].addToIndexes(); tokens[2].addToIndexes(); tokens[3].addToIndexes();
@Test public void test() throws AnalysisEngineProcessException, ResourceInitializationException { String text = "John say that he would visit London"; jCas.setDocumentText(text); ReferenceTarget rt = new ReferenceTarget(jCas); rt.addToIndexes(); Person p = new Person(jCas); p.setBegin(0); p.setEnd(4); p.setReferent(rt); p.setValue("John"); p.addToIndexes(); WordToken he = new WordToken(jCas); he.setBegin(text.indexOf("he")); he.setEnd(he.getBegin() + "he".length()); he.setReferent(rt); he.addToIndexes(); Location l = new Location(jCas); l.setBegin(text.indexOf("London")); l.setEnd(l.getBegin() + "London".length()); l.setValue("London"); l.addToIndexes(); processJCas(); List<Entity> list = new ArrayList<>(JCasUtil.select(jCas, Entity.class)); assertEquals(3, list.size()); assertEquals("John", list.get(0).getValue()); assertEquals("John", list.get(1).getValue()); assertTrue(list.get(1) instanceof Person); assertEquals("London", list.get(2).getValue()); }
wordToken.addToIndexes(jCas); offset = end + 1; } else {
wt0.setPartOfSpeech("NNP"); wt0.setSentenceOrder(0); wt0.addToIndexes(jCas); wt1.setPartOfSpeech(","); wt1.setSentenceOrder(1); wt1.addToIndexes(jCas); wt2.setPartOfSpeech("IN"); wt2.setSentenceOrder(2); wt2.addToIndexes(jCas); wt3.setPartOfSpeech("JJ"); wt3.setSentenceOrder(3); wt3.addToIndexes(jCas); wt4.setPartOfSpeech(","); wt4.setSentenceOrder(4); wt4.addToIndexes(jCas); wt5.setPartOfSpeech("NNS"); wt5.setSentenceOrder(5); wt5.addToIndexes(jCas); wt6.setPartOfSpeech("NNP"); wt6.setSentenceOrder(6); wt6.addToIndexes(jCas); wt7.setPartOfSpeech(".");
wordToken.addToIndexes(jCas); offset = end + 1; } else {
attackedVerb.setEnd(attacked.getEnd()); attackedVerb.setPartOfSpeech("VBZ"); attackedVerb.addToIndexes(); attackNoun.setEnd(attack.getEnd()); attackNoun.setPartOfSpeech("NNS"); attackNoun.addToIndexes(); attackAdj.setEnd(attackAdj.getEnd()); attackAdj.setPartOfSpeech("ADJ"); attackAdj.addToIndexes();
tokens[0].setBegin(0); tokens[0].setEnd(1); tokens[0].addToIndexes(); tokens[1].addToIndexes(); tokens[2].addToIndexes(); tokens[3].addToIndexes();
a.addToIndexes(); sample = new WordToken(jCas, 2, 8); sample.setPartOfSpeech("NN"); sample.addToIndexes(); of = new WordToken(jCas, 9, 11); of.setPartOfSpeech("IN"); of.addToIndexes(); text = new WordToken(jCas, 12, 16); text.setPartOfSpeech("NN"); text.addToIndexes();
wt0.setPartOfSpeech("NNP"); wt0.setSentenceOrder(0); wt0.addToIndexes(jCas); wt1.setPartOfSpeech("VBD"); wt1.setSentenceOrder(1); wt1.addToIndexes(jCas); wt2.setPartOfSpeech("NNP"); wt2.setSentenceOrder(2); wt2.addToIndexes(jCas); wt3.setPartOfSpeech("CC"); wt3.setSentenceOrder(3); wt3.addToIndexes(jCas); wt4.setPartOfSpeech("NNP"); wt4.setSentenceOrder(4); wt4.addToIndexes(jCas); wt5.setPartOfSpeech("."); wt5.setSentenceOrder(5); wt5.addToIndexes(jCas);
jumps.setEnd(8 + "jumps".length()); jumps.setPartOfSpeech("VB"); jumps.addToIndexes();
jumps.setLemmas(new FSArray(jCas, 1)); jumps.setLemmas(0, jumpLemma); jumps.addToIndexes();