uk.gov.dstl.baleen.types.language.WordToken.getPartOfSpeech java code examples

private void addPartOfSpeech(final WordToken wt) {
 String pos = wt.getPartOfSpeech();
 partOfSpeech.put(pos, wt);
 ROOT_POS.forEach(
   root -> {
    if (pos.startsWith(root)) {
     partOfSpeech.put(root, wt);
    }
   });
}

private void addPartOfSpeech(final WordToken wt) {
 String pos = wt.getPartOfSpeech();
 partOfSpeech.put(pos, wt);
 ROOT_POS.forEach(
   root -> {
    if (pos.startsWith(root)) {
     partOfSpeech.put(root, wt);
    }
   });
}

/**
 * Check if the given word token satisfies the constrains of this dependency node.
 *
 * <p>To match, the word token must have the same root part of speech (eg NN will also match NNP,
 * NNS etc.) and the covered text must satisfy the content regular expression, if defined.
 *
 * @param wt
 * @return true if this matches
 */
public boolean matches(WordToken wt) {
 return wt.getPartOfSpeech().startsWith(getType())
   && (StringUtils.isEmpty(getContent()) || wt.getCoveredText().matches(getContent()));
}

/**
 * Check if the given word token satisfies the constrains of this dependency node.
 *
 * <p>To match, the word token must have the same root part of speech (eg NN will also match NNP,
 * NNS etc.) and the covered text must satisfy the content regular expression, if defined.
 *
 * @param wt
 * @return true if this matches
 */
public boolean matches(WordToken wt) {
 return wt.getPartOfSpeech().startsWith(getType())
   && (StringUtils.isEmpty(getContent()) || wt.getCoveredText().matches(getContent()));
}

@Before
public void before() {
 pe = new PatternExtract(from, to, 0, 10);
 Mockito.when(token.getPartOfSpeech()).thenReturn("NN");
 Mockito.when(token.getCoveredText()).thenReturn("token");
}

t -> {
 list.add(normalize(t.getCoveredText()));
 list.add(t.getPartOfSpeech());
});
t -> {
 list.add(normalize(t.getCoveredText()));
 list.add(t.getPartOfSpeech());
});

  new DependencyNode(
    Long.toString(dependent.getInternalId()),
    dependent.getPartOfSpeech(),
    dependent.getCoveredText());
DependencyTree dependencyTree = new DependencyTree(dependencyNode);

  new DependencyNode(
    Long.toString(dependent.getInternalId()),
    dependent.getPartOfSpeech(),
    dependent.getCoveredText());
DependencyTree dependencyTree = new DependencyTree(dependencyNode);

/**
 * Save words.
 *
 * @param pattern the pattern
 * @return the DB object
 */
private List<Object> saveWords(final Pattern pattern) {
 final List<Object> list = new ArrayList<>();
 for (int i = 0; i < pattern.getWords().size(); i++) {
  final WordToken w = pattern.getWords(i);
  final Document o =
    new Document().append("text", w.getCoveredText()).append("pos", w.getPartOfSpeech());
  if (w.getLemmas() != null && w.getLemmas().size() >= 1) {
   o.put("lemma", w.getLemmas(0).getLemmaForm());
  }
  list.add(o);
 }
 return list;
}

@Test
public void test() throws Exception {
 String text = "This is some text. It has three sentences. The first sentence has four words.";
 jCas.setDocumentText(text);
 SimplePipeline.runPipeline(jCas, ae);
 assertEquals(3, JCasUtil.select(jCas, Sentence.class).size()); // 3 sentences
 Sentence s1 = JCasUtil.selectByIndex(jCas, Sentence.class, 0);
 List<WordToken> tokens = JCasUtil.selectCovered(jCas, WordToken.class, s1);
 assertEquals(5, tokens.size()); // 5 tokens in the first sentence
 assertEquals("NN", tokens.get(3).getPartOfSpeech()); // 4th token of first sentence is a noun
 List<PhraseChunk> phrases = JCasUtil.selectCovered(jCas, PhraseChunk.class, s1);
 assertEquals(3, phrases.size()); // 3 chunks in the first sentence
 assertEquals(
   "some text", phrases.get(2).getCoveredText()); // 3rd chunk of 1st sentence is "some text"
}

 @Test
 public void testWiithText() throws Exception {

  String text = "This is some text. It has three sentences. The first sentence has four words.";
  jCas.setDocumentText(text);

  Text t1 = new Text(jCas, 19, 43);
  t1.addToIndexes();
  Text t2 = new Text(jCas, 43, jCas.getDocumentText().length());
  t2.addToIndexes();

  SimplePipeline.runPipeline(jCas, ae);

  assertEquals(2, JCasUtil.select(jCas, Sentence.class).size()); // 2 sentences

  // note due to text the first sentence annotation is the second in the text

  Sentence s1 = JCasUtil.selectByIndex(jCas, Sentence.class, 0);
  List<WordToken> tokens = JCasUtil.selectCovered(jCas, WordToken.class, s1);

  assertEquals(5, tokens.size()); // 5 tokens in the first sentence
  assertEquals("NNS", tokens.get(3).getPartOfSpeech()); // 4th token of first sentence is a noun

  List<PhraseChunk> phrases = JCasUtil.selectCovered(jCas, PhraseChunk.class, s1);
  assertEquals(3, phrases.size()); // 3 chunks in the first sentence
  assertEquals(
    "three sentences",
    phrases.get(2).getCoveredText()); // 3rd chunk of 1st sentence is "some text"
 }
}

Javadoc

getter for partOfSpeech - gets The part of speech (POS) tag. Usually a Penn Treebank tag.

Popular methods of WordToken

getCoveredText
setBegin
setEnd
<init>
addToIndexes
setPartOfSpeech
setter for partOfSpeech - sets The part of speech (POS) tag. Usually a Penn Treebank tag.
getBegin
getEnd
getLemmas
indexed getter for lemmas - gets an indexed value - A list of alternative lemmas for this word token
setLemmas
setter for lemmas - sets A list of alternative lemmas for this word token.
setSentenceOrder
setter for sentenceOrder - sets If not null, this should be the index position of the word token wit
equals

Popular in Java

Reading from database using SQL prepared statement
compareTo (BigDecimal)
getResourceAsStream (ClassLoader)
getSupportFragmentManager (FragmentActivity)
FileInputStream (java.io)
An input stream that reads bytes from a file. File file = ...finally if (in != null) in.clos
Thread (java.lang)
A thread is a thread of execution in a program. The Java Virtual Machine allows an application to ha
MalformedURLException (java.net)
This exception is thrown when a program attempts to create an URL from an incorrect specification.
URLEncoder (java.net)
This class is used to encode a string using the format required by application/x-www-form-urlencoded
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
Github Copilot alternatives

How to use getPartOfSpeechmethodin uk.gov.dstl.baleen.types.language.WordToken

Best Java code snippets using uk.gov.dstl.baleen.types.language.WordToken.getPartOfSpeech (Showing top 11 results out of 315)

How to use
getPartOfSpeech
method
in
uk.gov.dstl.baleen.types.language.WordToken