private void createWordTokens(JCas jCas) { Pattern p = Pattern.compile("[A-Za-z]+"); Matcher m = p.matcher(jCas.getDocumentText()); while (m.find()) { new WordToken(jCas, m.start(), m.end()).addToIndexes(); } } }
private void createWordTokens(JCas jCas) { Pattern p = Pattern.compile("[A-Za-z]+"); Matcher m = p.matcher(jCas.getDocumentText()); while (m.find()) { new WordToken(jCas, m.start(), m.end()).addToIndexes(); } } }
private void addWordTokens(JCas jCas) { Pattern p = Pattern.compile("[a-z]+", Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(jCas.getDocumentText()); while (m.find()) { WordToken wt = new WordToken(jCas, m.start(), m.end()); wt.addToIndexes(); } } }
private void createWordTokens(JCas jCas) { Pattern p = Pattern.compile("[A-Za-z]+"); Matcher m = p.matcher(jCas.getDocumentText()); while (m.find()) { new WordToken(jCas, m.start(), m.end()).addToIndexes(); } } }
public static List<WordToken> createWordTokens(JCas jCas, String regex) { List<WordToken> words = new ArrayList<>(); String documentText = jCas.getDocumentText(); Matcher matcher = Pattern.compile(regex).matcher(documentText); int begin = 0; int end = 0; while (matcher.find()) { end = matcher.start(); WordToken wt = new WordToken(jCas); wt.setBegin(begin); wt.setEnd(end); wt.addToIndexes(); words.add(wt); begin = matcher.end(); } return words; }
@Test public void testExtractWordsMissingWord() { final Set<WordToken> fromWords = graph.nearestWords(10, new WordToken(jCas)); // We include the word itself (even though its not in...?) Assert.assertEquals(1, fromWords.size()); }
private void addAnnotation(final int start, final int end) { final Annotation a = new WordToken(jCas); a.setBegin(start); a.setEnd(end); a.addToIndexes(); }
private WordToken createWordToken(Sentence sentence, int i) { WordToken wordToken = new WordToken(jCas); wordToken.setBegin(sentence.startOffsets()[i]); wordToken.setEnd(sentence.endOffsets()[i]); Option<String[]> tags = sentence.tags(); if (tags.isDefined()) { wordToken.setPartOfSpeech(tags.get()[i]); } Option<String[]> lemmas = sentence.lemmas(); if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) { wordToken.setLemmas(new FSArray(jCas, 1)); WordLemma lemma = new WordLemma(jCas); lemma.setLemmaForm(lemmas.get()[i]); if (tags.isDefined()) { lemma.setPartOfSpeech(tags.get()[i]); } lemma.addToIndexes(); wordToken.setLemmas(0, lemma); } else { wordToken.setLemmas(new FSArray(jCas, 0)); } wordToken.addToIndexes(); return wordToken; }
private WordToken createWordToken(Sentence sentence, int i) { WordToken wordToken = new WordToken(jCas); wordToken.setBegin(sentence.startOffsets()[i]); wordToken.setEnd(sentence.endOffsets()[i]); Option<String[]> tags = sentence.tags(); if (tags.isDefined()) { wordToken.setPartOfSpeech(tags.get()[i]); } Option<String[]> lemmas = sentence.lemmas(); if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) { wordToken.setLemmas(new FSArray(jCas, 1)); WordLemma lemma = new WordLemma(jCas); lemma.setLemmaForm(lemmas.get()[i]); if (tags.isDefined()) { lemma.setPartOfSpeech(tags.get()[i]); } lemma.addToIndexes(); wordToken.setLemmas(0, lemma); } else { wordToken.setLemmas(new FSArray(jCas, 0)); } wordToken.addToIndexes(); return wordToken; }
@Test public void testAddsLemma() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken t = new WordToken(jCas); t.setBegin(jCas.getDocumentText().indexOf("working")); t.setEnd(t.getBegin() + "working".length()); t.setPartOfSpeech("VERB"); t.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals("work", out.get(0).getLemmas(0).getLemmaForm()); }
@Test public void testSomeDistance() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final WordToken w1 = new WordToken(jCas); final WordToken w2 = new WordToken(jCas); final WordDistance a = new WordDistance(w1); final WordDistance b = new WordDistance(w2, a); assertEquals(1, b.getDistance()); assertSame(w2, b.getWord()); assertEquals(2, b.getWords().size()); assertSame(w1, b.getWords().get(0)); assertSame(w2, b.getWords().get(1)); assertEquals(a, b.getWordDistance()); }
@Test public void testMatches() throws UIMAException { JCas jCas = JCasSingleton.getJCasInstance(); RelationConstraint rc = new RelationConstraint("type", "subType", "pos", "source", "target"); Interaction i = new Interaction(jCas); assertFalse(rc.matches(i, Collections.emptyList())); i.setRelationshipType("type"); assertFalse(rc.matches(i, Collections.emptyList())); i.setRelationSubType("subtype"); assertTrue(rc.matches(i, Collections.emptyList())); WordToken wt1 = new WordToken(jCas); wt1.setPartOfSpeech("VERB"); WordToken wt2 = new WordToken(jCas); wt2.setPartOfSpeech("POS"); List<WordToken> wordTokens = new ArrayList<>(); wordTokens.add(wt1); assertFalse(rc.matches(i, wordTokens)); wordTokens.add(wt2); assertTrue(rc.matches(i, wordTokens)); } }
@Test public void testEnhancePronoun() { jCas.setDocumentText("He went to London"); WordToken wt = new WordToken(jCas, 0, 2); Mention m = new Mention(wt); genderEnhancer.enhance(m); assertEquals(Gender.M, m.getGender()); } }
@Test public void testNoDistance() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final WordToken word = new WordToken(jCas); final WordDistance wd = new WordDistance(word); assertEquals(0, wd.getDistance()); assertSame(word, wd.getWord()); assertEquals(1, wd.getWords().size()); assertEquals(word, wd.getWords().get(0)); }
@Test public void testAddsLemmaToExistingLemmas() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken s = new WordToken(jCas); s.setBegin(jCas.getDocumentText().indexOf("working")); s.setEnd(s.getBegin() + "working".length()); s.setPartOfSpeech("VERB"); s.setLemmas(new FSArray(jCas, 1)); final WordLemma existingLemma = new WordLemma(jCas); existingLemma.setPartOfSpeech("existing"); existingLemma.setLemmaForm("existing"); s.setLemmas(0, existingLemma); s.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals(existingLemma, out.get(0).getLemmas(0)); assertEquals("work", out.get(0).getLemmas(1).getLemmaForm()); } }
@Test public void testPattern() throws UIMAException { final Pattern s = new Pattern(jCas); s.setWords(new FSArray(jCas, 1)); s.setWords(0, new WordToken(jCas)); s.addToIndexes(); SimplePipeline.runPipeline(jCas, AnalysisEngineFactory.createEngine(Patterns.class)); }
threefour.addToIndexes(); tokens[0] = new WordToken(jCas); tokens[0].setBegin(0); tokens[0].setEnd(0); tokens[0].addToIndexes(); tokens[1] = new WordToken(jCas); tokens[1].setBegin(2); tokens[1].setEnd(2); tokens[1].addToIndexes(); tokens[2] = new WordToken(jCas); tokens[2].setBegin(4); tokens[2].setEnd(4); tokens[2].addToIndexes(); tokens[3] = new WordToken(jCas); tokens[3].setBegin(6); tokens[3].setEnd(text.length());
@Test public void test() throws AnalysisEngineProcessException, ResourceInitializationException { String text = "John say that he would visit London"; jCas.setDocumentText(text); ReferenceTarget rt = new ReferenceTarget(jCas); rt.addToIndexes(); Person p = new Person(jCas); p.setBegin(0); p.setEnd(4); p.setReferent(rt); p.setValue("John"); p.addToIndexes(); WordToken he = new WordToken(jCas); he.setBegin(text.indexOf("he")); he.setEnd(he.getBegin() + "he".length()); he.setReferent(rt); he.addToIndexes(); Location l = new Location(jCas); l.setBegin(text.indexOf("London")); l.setEnd(l.getBegin() + "London".length()); l.setValue("London"); l.addToIndexes(); processJCas(); List<Entity> list = new ArrayList<>(JCasUtil.select(jCas, Entity.class)); assertEquals(3, list.size()); assertEquals("John", list.get(0).getValue()); assertEquals("John", list.get(1).getValue()); assertTrue(list.get(1) instanceof Person); assertEquals("London", list.get(2).getValue()); }
@Test public void test() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final WordToken from = new WordToken(jCas); final WordToken to = new WordToken(jCas); final Dependency dependency = new Dependency(jCas); final Edge edge = new Edge(from, dependency, to); assertSame(dependency, edge.getDependency()); assertSame(to, edge.getTo()); assertSame(from, edge.getFrom()); assertSame(from, edge.getOther(to)); assertSame(to, edge.getOther(from)); assertFalse(edge.isFrom(to)); assertFalse(edge.isTo(from)); assertTrue(edge.isFrom(from)); assertTrue(edge.isTo(to)); } }
@Test public void testEquals() throws UIMAException { final JCas jCas = JCasSingleton.getJCasInstance(); final WordToken w1 = new WordToken(jCas); final WordToken w2 = new WordToken(jCas); final WordDistance a = new WordDistance(w1); final WordDistance b = new WordDistance(w2); final WordDistance c = new WordDistance(w2, a); final WordDistance nwd1 = new WordDistance(null); final WordDistance nwd2 = new WordDistance(null); assertNotEquals(a, null); assertEquals(a, a); assertNotEquals(a, "Hello"); assertNotEquals(a, b); assertNotEquals(a, c); assertNotEquals(c, b); assertEquals(nwd1, nwd2); assertNotEquals(a.hashCode(), b.hashCode()); assertNotEquals(a.hashCode(), nwd1.hashCode()); assertEquals(-1, a.compareTo(c)); } }