@Override public String toString() { return word.getCoveredText() + " " + distance; } }
private WordToken createWordToken(Sentence sentence, int i) { WordToken wordToken = new WordToken(jCas); wordToken.setBegin(sentence.startOffsets()[i]); wordToken.setEnd(sentence.endOffsets()[i]); Option<String[]> tags = sentence.tags(); if (tags.isDefined()) { wordToken.setPartOfSpeech(tags.get()[i]); } Option<String[]> lemmas = sentence.lemmas(); if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) { wordToken.setLemmas(new FSArray(jCas, 1)); WordLemma lemma = new WordLemma(jCas); lemma.setLemmaForm(lemmas.get()[i]); if (tags.isDefined()) { lemma.setPartOfSpeech(tags.get()[i]); } lemma.addToIndexes(); wordToken.setLemmas(0, lemma); } else { wordToken.setLemmas(new FSArray(jCas, 0)); } wordToken.addToIndexes(); return wordToken; }
@Test public void testAddsLemma() throws UIMAException, ResourceInitializationException { jCas.setDocumentText("Is this working?"); final WordToken t = new WordToken(jCas); t.setBegin(jCas.getDocumentText().indexOf("working")); t.setEnd(t.getBegin() + "working".length()); t.setPartOfSpeech("VERB"); t.addToIndexes(); processJCas("wordnet", wordnetErd); final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class)); assertEquals("work", out.get(0).getLemmas(0).getLemmaForm()); }
/** * @generated * @param jcas JCas to which this Feature Structure belongs * @param begin offset to the begin spot in the SofA * @param end offset to the end spot in the SofA */ public WordToken(JCas jcas, int begin, int end) { super(jcas); setBegin(begin); setEnd(end); readObject(); }
/** * Check if the given word token satisfies the constrains of this dependency node. * * <p>To match, the word token must have the same root part of speech (eg NN will also match NNP, * NNS etc.) and the covered text must satisfy the content regular expression, if defined. * * @param wt * @return true if this matches */ public boolean matches(WordToken wt) { return wt.getPartOfSpeech().startsWith(getType()) && (StringUtils.isEmpty(getContent()) || wt.getCoveredText().matches(getContent())); }
private String getLemma(final WordToken token) { final FSArray array = token.getLemmas(); if (array == null || array.size() == 0) { return token.getCoveredText().toLowerCase(); } else { return ((WordLemma) array.get(0)).getLemmaForm(); } }
attackly.addToIndexes(); WordToken attackedVerb = new WordToken(jCas); attackedVerb.setBegin(attacked.getBegin()); attackedVerb.setEnd(attacked.getEnd()); attackedVerb.setPartOfSpeech("VBZ"); attackedVerb.addToIndexes(); WordToken attackNoun = new WordToken(jCas); attackNoun.setBegin(attack.getBegin()); attackNoun.setEnd(attack.getEnd()); attackNoun.setPartOfSpeech("NNS"); attackNoun.addToIndexes(); WordToken attackAdj = new WordToken(jCas); attackAdj.setBegin(attackAdj.getBegin()); attackAdj.setEnd(attackAdj.getEnd()); attackAdj.setPartOfSpeech("ADJ"); attackAdj.addToIndexes();
@Test public void test() throws AnalysisEngineProcessException, ResourceInitializationException { String text = "John say that he would visit London"; jCas.setDocumentText(text); ReferenceTarget rt = new ReferenceTarget(jCas); rt.addToIndexes(); Person p = new Person(jCas); p.setBegin(0); p.setEnd(4); p.setReferent(rt); p.setValue("John"); p.addToIndexes(); WordToken he = new WordToken(jCas); he.setBegin(text.indexOf("he")); he.setEnd(he.getBegin() + "he".length()); he.setReferent(rt); he.addToIndexes(); Location l = new Location(jCas); l.setBegin(text.indexOf("London")); l.setEnd(l.getBegin() + "London".length()); l.setValue("London"); l.addToIndexes(); processJCas(); List<Entity> list = new ArrayList<>(JCasUtil.select(jCas, Entity.class)); assertEquals(3, list.size()); assertEquals("John", list.get(0).getValue()); assertEquals("John", list.get(1).getValue()); assertTrue(list.get(1) instanceof Person); assertEquals("London", list.get(2).getValue()); }
private void createWordTokens(JCas jCas) { Pattern p = Pattern.compile("[A-Za-z]+"); Matcher m = p.matcher(jCas.getDocumentText()); while (m.find()) { new WordToken(jCas, m.start(), m.end()).addToIndexes(); } } }
/** * Save words. * * @param pattern the pattern * @return the DB object */ private List<Object> saveWords(final Pattern pattern) { final List<Object> list = new ArrayList<>(); for (int i = 0; i < pattern.getWords().size(); i++) { final WordToken w = pattern.getWords(i); final Document o = new Document().append("text", w.getCoveredText()).append("pos", w.getPartOfSpeech()); if (w.getLemmas() != null && w.getLemmas().size() >= 1) { o.put("lemma", w.getLemmas(0).getLemmaForm()); } list.add(o); } return list; }
a = new WordToken(jCas, 0, 1); a.setPartOfSpeech("DT"); a.addToIndexes(); sample = new WordToken(jCas, 2, 8); sample.setPartOfSpeech("NN"); sample.addToIndexes(); of = new WordToken(jCas, 9, 11); of.setPartOfSpeech("IN"); of.addToIndexes(); text = new WordToken(jCas, 12, 16); text.setPartOfSpeech("NN"); text.addToIndexes();
private void createdependency(WordToken govenor, WordToken dependent, String type) { Dependency dependency = new Dependency(jCas); dependency.setBegin(dependent.getBegin()); dependency.setEnd(dependent.getEnd()); dependency.setGovernor(govenor); dependency.setDependent(dependent); dependency.setDependencyType(type); dependency.addToIndexes(); }
@Test public void testExtractWordsMissingWord() { final Set<WordToken> fromWords = graph.nearestWords(10, new WordToken(jCas)); // We include the word itself (even though its not in...?) Assert.assertEquals(1, fromWords.size()); }
DependencyNode dependencyNode = new DependencyNode( Long.toString(dependent.getInternalId()), dependent.getPartOfSpeech(), dependent.getCoveredText()); DependencyTree dependencyTree = new DependencyTree(dependencyNode); map.put(dependent, dependencyTree);
@Test public void testMatches() throws UIMAException { JCas jCas = JCasSingleton.getJCasInstance(); RelationConstraint rc = new RelationConstraint("type", "subType", "pos", "source", "target"); Interaction i = new Interaction(jCas); assertFalse(rc.matches(i, Collections.emptyList())); i.setRelationshipType("type"); assertFalse(rc.matches(i, Collections.emptyList())); i.setRelationSubType("subtype"); assertTrue(rc.matches(i, Collections.emptyList())); WordToken wt1 = new WordToken(jCas); wt1.setPartOfSpeech("VERB"); WordToken wt2 = new WordToken(jCas); wt2.setPartOfSpeech("POS"); List<WordToken> wordTokens = new ArrayList<>(); wordTokens.add(wt1); assertFalse(rc.matches(i, wordTokens)); wordTokens.add(wt2); assertTrue(rc.matches(i, wordTokens)); } }
.filter( p -> p.getCoveredText().equalsIgnoreCase("his") || p.getCoveredText().equalsIgnoreCase("he")) .allMatch(p -> p.getReferent().getInternalId() == referenceId); assertTrue(allMatch);
private void addPartOfSpeech(final WordToken wt) { String pos = wt.getPartOfSpeech(); partOfSpeech.put(pos, wt); ROOT_POS.forEach( root -> { if (pos.startsWith(root)) { partOfSpeech.put(root, wt); } }); }
@Test public void testBuildCovered() { // Create a fake sub-sentence final Sentence s = new Sentence(jCas); s.setBegin(0); s.setEnd(sample.getEnd()); final DependencyGraph graph = DependencyGraph.build(jCas, s); Assert.assertNotNull(graph); graph.log(); assertEquals(2, graph.getWords().size()); assertEquals(dA, graph.getDependency(a)); assertEquals(1, graph.getEdges(a).count()); assertEquals(0, graph.getGovernors(a).size()); assertEquals(dSample, graph.getDependency(sample)); assertEquals(1, graph.getEdges(sample).count()); assertEquals(1, graph.getGovernors(sample).size()); }
private void createWordTokens(JCas jCas) { Pattern p = Pattern.compile("[A-Za-z]+"); Matcher m = p.matcher(jCas.getDocumentText()); while (m.find()) { new WordToken(jCas, m.start(), m.end()).addToIndexes(); } } }
/** * Check if the given word token satisfies the constrains of this dependency node. * * <p>To match, the word token must have the same root part of speech (eg NN will also match NNP, * NNS etc.) and the covered text must satisfy the content regular expression, if defined. * * @param wt * @return true if this matches */ public boolean matches(WordToken wt) { return wt.getPartOfSpeech().startsWith(getType()) && (StringUtils.isEmpty(getContent()) || wt.getCoveredText().matches(getContent())); }