uk.gov.dstl.baleen.types.language.WordToken.setEnd java code examples

/**
 * @generated
 * @param jcas JCas to which this Feature Structure belongs
 * @param begin offset to the begin spot in the SofA
 * @param end offset to the end spot in the SofA
 */
public WordToken(JCas jcas, int begin, int end) {
 super(jcas);
 setBegin(begin);
 setEnd(end);
 readObject();
}

/**
 * @generated
 * @param jcas JCas to which this Feature Structure belongs
 * @param begin offset to the begin spot in the SofA
 * @param end offset to the end spot in the SofA
 */
public WordToken(JCas jcas, int begin, int end) {
 super(jcas);
 setBegin(begin);
 setEnd(end);
 readObject();
}

public static List<WordToken> createWordTokens(JCas jCas, String regex) {
 List<WordToken> words = new ArrayList<>();
 String documentText = jCas.getDocumentText();
 Matcher matcher = Pattern.compile(regex).matcher(documentText);
 int begin = 0;
 int end = 0;
 while (matcher.find()) {
  end = matcher.start();
  WordToken wt = new WordToken(jCas);
  wt.setBegin(begin);
  wt.setEnd(end);
  wt.addToIndexes();
  words.add(wt);
  begin = matcher.end();
 }
 return words;
}

private WordToken createWordToken(Sentence sentence, int i) {
 WordToken wordToken = new WordToken(jCas);
 wordToken.setBegin(sentence.startOffsets()[i]);
 wordToken.setEnd(sentence.endOffsets()[i]);
 Option<String[]> tags = sentence.tags();
 if (tags.isDefined()) {
  wordToken.setPartOfSpeech(tags.get()[i]);
 }
 Option<String[]> lemmas = sentence.lemmas();
 if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) {
  wordToken.setLemmas(new FSArray(jCas, 1));
  WordLemma lemma = new WordLemma(jCas);
  lemma.setLemmaForm(lemmas.get()[i]);
  if (tags.isDefined()) {
   lemma.setPartOfSpeech(tags.get()[i]);
  }
  lemma.addToIndexes();
  wordToken.setLemmas(0, lemma);
 } else {
  wordToken.setLemmas(new FSArray(jCas, 0));
 }
 wordToken.addToIndexes();
 return wordToken;
}

private WordToken createWordToken(Sentence sentence, int i) {
 WordToken wordToken = new WordToken(jCas);
 wordToken.setBegin(sentence.startOffsets()[i]);
 wordToken.setEnd(sentence.endOffsets()[i]);
 Option<String[]> tags = sentence.tags();
 if (tags.isDefined()) {
  wordToken.setPartOfSpeech(tags.get()[i]);
 }
 Option<String[]> lemmas = sentence.lemmas();
 if (lemmas.isDefined() && !MISSING_VALUE.equals(lemmas.get()[i])) {
  wordToken.setLemmas(new FSArray(jCas, 1));
  WordLemma lemma = new WordLemma(jCas);
  lemma.setLemmaForm(lemmas.get()[i]);
  if (tags.isDefined()) {
   lemma.setPartOfSpeech(tags.get()[i]);
  }
  lemma.addToIndexes();
  wordToken.setLemmas(0, lemma);
 } else {
  wordToken.setLemmas(new FSArray(jCas, 0));
 }
 wordToken.addToIndexes();
 return wordToken;
}

@Test
public void testAddsLemma() throws UIMAException, ResourceInitializationException {
 jCas.setDocumentText("Is this working?");
 final WordToken t = new WordToken(jCas);
 t.setBegin(jCas.getDocumentText().indexOf("working"));
 t.setEnd(t.getBegin() + "working".length());
 t.setPartOfSpeech("VERB");
 t.addToIndexes();
 processJCas("wordnet", wordnetErd);
 final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class));
 assertEquals("work", out.get(0).getLemmas(0).getLemmaForm());
}

 @Test
 public void testAddsLemmaToExistingLemmas()
   throws UIMAException, ResourceInitializationException {
  jCas.setDocumentText("Is this working?");

  final WordToken s = new WordToken(jCas);
  s.setBegin(jCas.getDocumentText().indexOf("working"));
  s.setEnd(s.getBegin() + "working".length());
  s.setPartOfSpeech("VERB");
  s.setLemmas(new FSArray(jCas, 1));
  final WordLemma existingLemma = new WordLemma(jCas);
  existingLemma.setPartOfSpeech("existing");
  existingLemma.setLemmaForm("existing");
  s.setLemmas(0, existingLemma);
  s.addToIndexes();

  processJCas("wordnet", wordnetErd);

  final List<WordToken> out = new ArrayList<>(JCasUtil.select(jCas, WordToken.class));

  assertEquals(existingLemma, out.get(0).getLemmas(0));
  assertEquals("work", out.get(0).getLemmas(1).getLemmaForm());
 }
}

tokens[0].setEnd(0);
tokens[0].addToIndexes();
tokens[1].setEnd(2);
tokens[1].addToIndexes();
tokens[2].setEnd(4);
tokens[2].addToIndexes();
tokens[3].setEnd(text.length());
tokens[3].addToIndexes();

@Test
public void test() throws AnalysisEngineProcessException, ResourceInitializationException {
 String text = "John say that he would visit London";
 jCas.setDocumentText(text);
 ReferenceTarget rt = new ReferenceTarget(jCas);
 rt.addToIndexes();
 Person p = new Person(jCas);
 p.setBegin(0);
 p.setEnd(4);
 p.setReferent(rt);
 p.setValue("John");
 p.addToIndexes();
 WordToken he = new WordToken(jCas);
 he.setBegin(text.indexOf("he"));
 he.setEnd(he.getBegin() + "he".length());
 he.setReferent(rt);
 he.addToIndexes();
 Location l = new Location(jCas);
 l.setBegin(text.indexOf("London"));
 l.setEnd(l.getBegin() + "London".length());
 l.setValue("London");
 l.addToIndexes();
 processJCas();
 List<Entity> list = new ArrayList<>(JCasUtil.select(jCas, Entity.class));
 assertEquals(3, list.size());
 assertEquals("John", list.get(0).getValue());
 assertEquals("John", list.get(1).getValue());
 assertTrue(list.get(1) instanceof Person);
 assertEquals("London", list.get(2).getValue());
}

final WordToken wordToken = new WordToken(jCas);
wordToken.setBegin(offset);
wordToken.setEnd(end);

private void addWordTokens() {
 WordToken wt0 = new WordToken(jCas);
 wt0.setBegin(0);
 wt0.setEnd(3);
 wt0.setPartOfSpeech("NNP");
 wt0.setSentenceOrder(0);
 wt1.setEnd(4);
 wt1.setPartOfSpeech(",");
 wt1.setSentenceOrder(1);
 wt2.setEnd(7);
 wt2.setPartOfSpeech("IN");
 wt2.setSentenceOrder(2);
 wt3.setEnd(12);
 wt3.setPartOfSpeech("JJ");
 wt3.setSentenceOrder(3);
 wt4.setEnd(13);
 wt4.setPartOfSpeech(",");
 wt4.setSentenceOrder(4);
 wt5.setEnd(20);
 wt5.setPartOfSpeech("NNS");
 wt5.setSentenceOrder(5);
 wt6.setEnd(27);
 wt6.setPartOfSpeech("NNP");
 wt6.setSentenceOrder(6);

final WordToken wordToken = new WordToken(jCas);
wordToken.setBegin(offset);
wordToken.setEnd(end);

attackedVerb.setEnd(attacked.getEnd());
attackedVerb.setPartOfSpeech("VBZ");
attackedVerb.addToIndexes();
attackNoun.setEnd(attack.getEnd());
attackNoun.setPartOfSpeech("NNS");
attackNoun.addToIndexes();
attackAdj.setEnd(attackAdj.getEnd());
attackAdj.setPartOfSpeech("ADJ");
attackAdj.addToIndexes();

tokens[0].setEnd(1);
tokens[0].addToIndexes();
tokens[1].setEnd(3);
tokens[1].addToIndexes();
tokens[2].setEnd(5);
tokens[2].addToIndexes();
tokens[3].setEnd(text.length());
tokens[3].addToIndexes();

wt0.setEnd(3);
wt0.setPartOfSpeech("NNP");
wt0.setSentenceOrder(0);
wt1.setEnd(11);
wt1.setPartOfSpeech("VBD");
wt1.setSentenceOrder(1);
wt2.setEnd(18);
wt2.setPartOfSpeech("NNP");
wt2.setSentenceOrder(2);
wt3.setEnd(19);
wt3.setPartOfSpeech(".");
wt3.setSentenceOrder(3);
wt4.setEnd(23);
wt4.setPartOfSpeech("NNP");
wt4.setSentenceOrder(0);
wt5.setEnd(28);
wt5.setPartOfSpeech("VBD");
wt5.setSentenceOrder(1);
wt6.setEnd(31);
wt6.setPartOfSpeech("TO");
wt6.setSentenceOrder(2);
wt7.setEnd(37);

wt0.setEnd(3);
wt0.setPartOfSpeech("NNP");
wt0.setSentenceOrder(0);
wt1.setEnd(11);
wt1.setPartOfSpeech("VBD");
wt1.setSentenceOrder(1);
wt2.setEnd(18);
wt2.setPartOfSpeech("NNP");
wt2.setSentenceOrder(2);
wt3.setEnd(19);
wt3.setPartOfSpeech(",");
wt3.setSentenceOrder(3);
wt4.setEnd(25);
wt4.setPartOfSpeech("NNP");
wt4.setSentenceOrder(4);
wt5.setEnd(29);
wt5.setPartOfSpeech("CC");
wt5.setSentenceOrder(5);
wt6.setEnd(33);
wt6.setPartOfSpeech("NNP");
wt6.setSentenceOrder(6);
wt7.setEnd(38);

private void addAnnotations() {
 WordToken wt0 = new WordToken(jCas);
 wt0.setBegin(0);
 wt0.setEnd(3);
 wt0.setPartOfSpeech("NNP");
 wt0.setSentenceOrder(0);
 wt1.setEnd(11);
 wt1.setPartOfSpeech("VBD");
 wt1.setSentenceOrder(1);
 wt2.setEnd(18);
 wt2.setPartOfSpeech("NNP");
 wt2.setSentenceOrder(2);
 wt3.setEnd(22);
 wt3.setPartOfSpeech("CC");
 wt3.setSentenceOrder(3);
 wt4.setEnd(28);
 wt4.setPartOfSpeech("NNP");
 wt4.setSentenceOrder(4);
 wt5.setEnd(29);
 wt5.setPartOfSpeech(".");
 wt5.setSentenceOrder(5);

private void addDependencyInformation() {
 WordToken wt0 = new WordToken(jCas);
 wt0.setBegin(0);
 wt0.setEnd(3);
 wt0.setPartOfSpeech("NNP");
 wt0.setSentenceOrder(0);
 wt1.setEnd(4);
 wt1.setPartOfSpeech(",");
 wt1.setSentenceOrder(1);
 wt2.setEnd(7);
 wt2.setPartOfSpeech("IN");
 wt2.setSentenceOrder(2);
 wt3.setEnd(12);
 wt3.setPartOfSpeech("JJ");
 wt3.setSentenceOrder(3);
 wt4.setEnd(13);
 wt4.setPartOfSpeech(",");
 wt4.setSentenceOrder(4);
 wt5.setEnd(20);
 wt5.setPartOfSpeech("NNS");
 wt5.setSentenceOrder(5);
 wt6.setEnd(27);
 wt6.setPartOfSpeech("NNP");
 wt6.setSentenceOrder(6);

jumps.setEnd(8 + "jumps".length());
jumps.setPartOfSpeech("VB");
jumps.addToIndexes();

jumps.setEnd(8 + "jumps".length());
jumps.setPartOfSpeech("VB");
final WordLemma jumpLemma = new WordLemma(jCas);

Popular methods of WordToken

getCoveredText
setBegin
<init>
addToIndexes
setPartOfSpeech
setter for partOfSpeech - sets The part of speech (POS) tag. Usually a Penn Treebank tag.
getBegin
getEnd
getLemmas
indexed getter for lemmas - gets an indexed value - A list of alternative lemmas for this word token
getPartOfSpeech
getter for partOfSpeech - gets The part of speech (POS) tag. Usually a Penn Treebank tag.
setLemmas
setter for lemmas - sets A list of alternative lemmas for this word token.
setSentenceOrder
setter for sentenceOrder - sets If not null, this should be the index position of the word token wit
equals

Popular in Java

Reading from database using SQL prepared statement
getResourceAsStream (ClassLoader)
requestLocationUpdates (LocationManager)
getContentResolver (Context)
IOException (java.io)
Signals a general, I/O-related error. Error details may be specified when calling the constructor, a
InputStreamReader (java.io)
A class for turning a byte stream into a character stream. Data read from the source input stream is
PrintStream (java.io)
Fake signature of an existing Java class.
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
CountDownLatch (java.util.concurrent)
A synchronization aid that allows one or more threads to wait until a set of operations being perfor
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
CodeWhisperer alternatives

How to use setEndmethodin uk.gov.dstl.baleen.types.language.WordToken

Best Java code snippets using uk.gov.dstl.baleen.types.language.WordToken.setEnd (Showing top 20 results out of 315)

How to use
setEnd
method
in
uk.gov.dstl.baleen.types.language.WordToken