uk.gov.dstl.baleen.types.language.WordToken java code examples

 @Override
 public String toString() {
  return word.getCoveredText() + " " + distance;
 }
}

private WordToken createWordToken(Sentence sentence, int i) {
 WordToken wordToken = new WordToken(jCas);
 wordToken.setBegin(sentence.startOffsets()[i]);
 wordToken.setEnd(sentence.endOffsets()[i]);
 Option<String[]> tags = sentence.tags();
 if (tags.isDefined()) {
  wordToken.setPartOfSpeech(tags.get()[i]);
 }
 Option<String[]> lemmas = sentence.lemmas();
 if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) {
  wordToken.setLemmas(new FSArray(jCas, 1));
  WordLemma lemma = new WordLemma(jCas);
  lemma.setLemmaForm(lemmas.get()[i]);
  if (tags.isDefined()) {
   lemma.setPartOfSpeech(tags.get()[i]);
  }
  lemma.addToIndexes();
  wordToken.setLemmas(0, lemma);
 } else {
  wordToken.setLemmas(new FSArray(jCas, 0));
 }
 wordToken.addToIndexes();
 return wordToken;
}

@Test
public void testAddsLemma() throws UIMAException, ResourceInitializationException {
 jCas.setDocumentText("Is this working?");
 final WordToken t = new WordToken(jCas);
 t.setBegin(jCas.getDocumentText().indexOf("working"));
 t.setEnd(t.getBegin() + "working".length());
 t.setPartOfSpeech("VERB");
 t.addToIndexes();
 processJCas("wordnet", wordnetErd);
 final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class));
 assertEquals("work", out.get(0).getLemmas(0).getLemmaForm());
}

/**
 * @generated
 * @param jcas JCas to which this Feature Structure belongs
 * @param begin offset to the begin spot in the SofA
 * @param end offset to the end spot in the SofA
 */
public WordToken(JCas jcas, int begin, int end) {
 super(jcas);
 setBegin(begin);
 setEnd(end);
 readObject();
}

/**
 * Check if the given word token satisfies the constrains of this dependency node.
 *
 * <p>To match, the word token must have the same root part of speech (eg NN will also match NNP,
 * NNS etc.) and the covered text must satisfy the content regular expression, if defined.
 *
 * @param wt
 * @return true if this matches
 */
public boolean matches(WordToken wt) {
 return wt.getPartOfSpeech().startsWith(getType())
   && (StringUtils.isEmpty(getContent()) || wt.getCoveredText().matches(getContent()));
}

private String getLemma(final WordToken token) {
 final FSArray array = token.getLemmas();
 if (array == null || array.size() == 0) {
  return token.getCoveredText().toLowerCase();
 } else {
  return ((WordLemma) array.get(0)).getLemmaForm();
 }
}

attackly.addToIndexes();
WordToken attackedVerb = new WordToken(jCas);
attackedVerb.setBegin(attacked.getBegin());
attackedVerb.setEnd(attacked.getEnd());
attackedVerb.setPartOfSpeech("VBZ");
attackedVerb.addToIndexes();
WordToken attackNoun = new WordToken(jCas);
attackNoun.setBegin(attack.getBegin());
attackNoun.setEnd(attack.getEnd());
attackNoun.setPartOfSpeech("NNS");
attackNoun.addToIndexes();
WordToken attackAdj = new WordToken(jCas);
attackAdj.setBegin(attackAdj.getBegin());
attackAdj.setEnd(attackAdj.getEnd());
attackAdj.setPartOfSpeech("ADJ");
attackAdj.addToIndexes();

@Test
public void test() throws AnalysisEngineProcessException, ResourceInitializationException {
 String text = "John say that he would visit London";
 jCas.setDocumentText(text);
 ReferenceTarget rt = new ReferenceTarget(jCas);
 rt.addToIndexes();
 Person p = new Person(jCas);
 p.setBegin(0);
 p.setEnd(4);
 p.setReferent(rt);
 p.setValue("John");
 p.addToIndexes();
 WordToken he = new WordToken(jCas);
 he.setBegin(text.indexOf("he"));
 he.setEnd(he.getBegin() + "he".length());
 he.setReferent(rt);
 he.addToIndexes();
 Location l = new Location(jCas);
 l.setBegin(text.indexOf("London"));
 l.setEnd(l.getBegin() + "London".length());
 l.setValue("London");
 l.addToIndexes();
 processJCas();
 List<Entity> list = new ArrayList<>(JCasUtil.select(jCas, Entity.class));
 assertEquals(3, list.size());
 assertEquals("John", list.get(0).getValue());
 assertEquals("John", list.get(1).getValue());
 assertTrue(list.get(1) instanceof Person);
 assertEquals("London", list.get(2).getValue());
}

 private void createWordTokens(JCas jCas) {
  Pattern p = Pattern.compile("[A-Za-z]+");
  Matcher m = p.matcher(jCas.getDocumentText());
  while (m.find()) {
   new WordToken(jCas, m.start(), m.end()).addToIndexes();
  }
 }
}

/**
 * Save words.
 *
 * @param pattern the pattern
 * @return the DB object
 */
private List<Object> saveWords(final Pattern pattern) {
 final List<Object> list = new ArrayList<>();
 for (int i = 0; i < pattern.getWords().size(); i++) {
  final WordToken w = pattern.getWords(i);
  final Document o =
    new Document().append("text", w.getCoveredText()).append("pos", w.getPartOfSpeech());
  if (w.getLemmas() != null && w.getLemmas().size() >= 1) {
   o.put("lemma", w.getLemmas(0).getLemmaForm());
  }
  list.add(o);
 }
 return list;
}

a = new WordToken(jCas, 0, 1);
a.setPartOfSpeech("DT");
a.addToIndexes();
sample = new WordToken(jCas, 2, 8);
sample.setPartOfSpeech("NN");
sample.addToIndexes();
of = new WordToken(jCas, 9, 11);
of.setPartOfSpeech("IN");
of.addToIndexes();
text = new WordToken(jCas, 12, 16);
text.setPartOfSpeech("NN");
text.addToIndexes();

private void createdependency(WordToken govenor, WordToken dependent, String type) {
 Dependency dependency = new Dependency(jCas);
 dependency.setBegin(dependent.getBegin());
 dependency.setEnd(dependent.getEnd());
 dependency.setGovernor(govenor);
 dependency.setDependent(dependent);
 dependency.setDependencyType(type);
 dependency.addToIndexes();
}

@Test
public void testExtractWordsMissingWord() {
 final Set<WordToken> fromWords = graph.nearestWords(10, new WordToken(jCas));
 // We include the word itself (even though its not in...?)
 Assert.assertEquals(1, fromWords.size());
}

DependencyNode dependencyNode =
  new DependencyNode(
    Long.toString(dependent.getInternalId()),
    dependent.getPartOfSpeech(),
    dependent.getCoveredText());
DependencyTree dependencyTree = new DependencyTree(dependencyNode);
map.put(dependent, dependencyTree);

 @Test
 public void testMatches() throws UIMAException {
  JCas jCas = JCasSingleton.getJCasInstance();
  RelationConstraint rc = new RelationConstraint("type", "subType", "pos", "source", "target");

  Interaction i = new Interaction(jCas);
  assertFalse(rc.matches(i, Collections.emptyList()));

  i.setRelationshipType("type");
  assertFalse(rc.matches(i, Collections.emptyList()));

  i.setRelationSubType("subtype");
  assertTrue(rc.matches(i, Collections.emptyList()));

  WordToken wt1 = new WordToken(jCas);
  wt1.setPartOfSpeech("VERB");

  WordToken wt2 = new WordToken(jCas);
  wt2.setPartOfSpeech("POS");

  List<WordToken> wordTokens = new ArrayList<>();

  wordTokens.add(wt1);
  assertFalse(rc.matches(i, wordTokens));

  wordTokens.add(wt2);
  assertTrue(rc.matches(i, wordTokens));
 }
}

    .filter(
      p ->
        p.getCoveredText().equalsIgnoreCase("his")
          || p.getCoveredText().equalsIgnoreCase("he"))
    .allMatch(p -> p.getReferent().getInternalId() == referenceId);
assertTrue(allMatch);

private void addPartOfSpeech(final WordToken wt) {
 String pos = wt.getPartOfSpeech();
 partOfSpeech.put(pos, wt);
 ROOT_POS.forEach(
   root -> {
    if (pos.startsWith(root)) {
     partOfSpeech.put(root, wt);
    }
   });
}

@Test
public void testBuildCovered() {
 // Create a fake sub-sentence
 final Sentence s = new Sentence(jCas);
 s.setBegin(0);
 s.setEnd(sample.getEnd());
 final DependencyGraph graph = DependencyGraph.build(jCas, s);
 Assert.assertNotNull(graph);
 graph.log();
 assertEquals(2, graph.getWords().size());
 assertEquals(dA, graph.getDependency(a));
 assertEquals(1, graph.getEdges(a).count());
 assertEquals(0, graph.getGovernors(a).size());
 assertEquals(dSample, graph.getDependency(sample));
 assertEquals(1, graph.getEdges(sample).count());
 assertEquals(1, graph.getGovernors(sample).size());
}

 private void createWordTokens(JCas jCas) {
  Pattern p = Pattern.compile("[A-Za-z]+");
  Matcher m = p.matcher(jCas.getDocumentText());
  while (m.find()) {
   new WordToken(jCas, m.start(), m.end()).addToIndexes();
  }
 }
}

/**
 * Check if the given word token satisfies the constrains of this dependency node.
 *
 * <p>To match, the word token must have the same root part of speech (eg NN will also match NNP,
 * NNS etc.) and the covered text must satisfy the content regular expression, if defined.
 *
 * @param wt
 * @return true if this matches
 */
public boolean matches(WordToken wt) {
 return wt.getPartOfSpeech().startsWith(getType())
   && (StringUtils.isEmpty(getContent()) || wt.getCoveredText().matches(getContent()));
}

Javadoc

The output from some text tokenization process. Updated by JCasGen Wed Apr 13 13:23:16 BST 2016 XML source: H:/git/TextProcessing/core/baleen/baleen-uima/src/main/resources/types/common_type_system.xml

Most used methods

getCoveredText
setBegin
setEnd
<init>
addToIndexes
setPartOfSpeech
setter for partOfSpeech - sets The part of speech (POS) tag. Usually a Penn Treebank tag.
getBegin
getEnd
getLemmas
indexed getter for lemmas - gets an indexed value - A list of alternative lemmas for this word token
getPartOfSpeech
getter for partOfSpeech - gets The part of speech (POS) tag. Usually a Penn Treebank tag.
setLemmas
setter for lemmas - sets A list of alternative lemmas for this word token.
setSentenceOrder
setter for sentenceOrder - sets If not null, this should be the index position of the word token wit

Popular in Java

Parsing JSON documents to java classes using gson
getSupportFragmentManager (FragmentActivity)
notifyDataSetChanged (ArrayAdapter)
getResourceAsStream (ClassLoader)
String (java.lang)
Enumeration (java.util)
A legacy iteration interface.New code should use Iterator instead. Iterator replaces the enumeration
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
Scheduler (org.quartz)
This is the main interface of a Quartz Scheduler. A Scheduler maintains a registry of org.quartz.Job
Github Copilot alternatives

How to useWordToken in uk.gov.dstl.baleen.types.language

Best Java code snippets using uk.gov.dstl.baleen.types.language.WordToken (Showing top 20 results out of 315)

How to use
WordToken
in
uk.gov.dstl.baleen.types.language