opennlp.tools.util.StringList java code examples

private static StringList getNPlusOneNgram(StringList ngram, String word) {
 String[] tokens = new String[ngram.size() + 1];
 for (int i = 0; i < ngram.size(); i++) {
  tokens[i] = ngram.getToken(i);
 }
 tokens[tokens.length - 1] = word;
 return new StringList(tokens);
}

/**
 * Tests {@link StringList#compareToIgnoreCase(StringList)}.
 */
@Test
public void testCompareToIgnoreCase() {
 Assert.assertTrue(new StringList("a", "b").compareToIgnoreCase(
   new StringList("A", "B")));
}

/**
 * Tests {@link StringList#equals(Object)}.
 */
@Test
public void testEquals() {
 Assert.assertEquals(new StringList("a", "b"),
   new StringList("a", "b"));
 Assert.assertFalse(new StringList("a", "b").equals(
   new StringList("A", "B")));
}

private static int indexOf(StringList sentence, String token) {
 for (int i = 0; i < sentence.size(); i++) {
  if (token.equals(sentence.getToken(i))) {
   return i;
  }
 }
 return -1;
}

/**
 * calculate the probability of a unigram in a vocabulary using maximum likelihood estimation
 *
 * @param word the only word in the unigram
 * @param set  the vocabulary
 * @return the maximum likelihood probability
 */
public static double calculateUnigramMLProbability(String word, Collection<StringList> set) {
 double vocSize = 0d;
 for (StringList s : set) {
  vocSize += s.size();
 }
 return count(new StringList(word), set) / vocSize;
}

 @Override
 public boolean contains(Object obj) {
  boolean result = false;
  if (obj instanceof String) {
   String str = (String) obj;
   result = entrySet.contains(new StringListWrapper(new StringList(str)));
  }
  return result;
 }
};

 @Override
 public String[] getContext(CharSequence document) {
  String[] superContext = super.getContext(document);
  List<String> context = new ArrayList(Arrays.asList(superContext));
  document = this.normalizer.normalize(document);
  SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
  String[] words = tokenizer.tokenize(document.toString());
  NGramModel tokenNgramModel = new NGramModel();
  if (words.length > 0) {
   tokenNgramModel.add(new StringList(words), 1, 3);
   Iterator tokenNgramIterator = tokenNgramModel.iterator();
   while (tokenNgramIterator.hasNext()) {
    StringList tokenList = (StringList) tokenNgramIterator.next();
    if (tokenList.size() > 0) {
     context.add("tg=" + tokenList.toString());
    }
   }
  }
  return context.toArray(new String[context.size()]);
 }
}

/**
 * Tests {@link StringList} which uses {@link String#intern}.
 */
@Test
public void testIntern() {
 StringList l1 = new StringList("a");
 StringList l2 = new StringList("a", "b");
 Assert.assertTrue(l1.getToken(0) == l2.getToken(0));
}

 /**
  * Tests {@link StringList#toString()}.
  */
 @Test
 public void testToString() {
  Assert.assertEquals("[a]", new StringList("a").toString());
  Assert.assertEquals("[a,b]", new StringList("a", "b").toString());
 }
}

/**
 * Tests {@link StringList#hashCode()}.
 */
@Test
public void testHashCode() {
 Assert.assertEquals(new StringList("a", "b").hashCode(),
   new StringList("a", "b").hashCode());
 Assert.assertNotEquals(new StringList("a", "b").hashCode(),
   new StringList("a", "c").hashCode());
}

/**
 * Tests {@link StringList#iterator()}.
 */
@Test
public void testIterator() {
 StringList l = new StringList("a");
 Iterator<String> it = l.iterator();
 Assert.assertTrue(it.hasNext());
 Assert.assertEquals("a", it.next());
 Assert.assertFalse(it.hasNext());
 // now test with more than one string
 l = new StringList("a", "b", "c");
 it = l.iterator();
 Assert.assertTrue(it.hasNext());
 Assert.assertEquals("a", it.next());
 Assert.assertTrue(it.hasNext());
 Assert.assertEquals("b", it.next());
 Assert.assertTrue(it.hasNext());
 Assert.assertEquals("c", it.next());
 Assert.assertFalse(it.hasNext());
}

public boolean hasNext() {
 return index < size();
}

@Override
public boolean equals(Object obj) {
 boolean result;
 if (obj == this) {
  result = true;
 }
 else if (obj instanceof StringListWrapper) {
  StringListWrapper other = (StringListWrapper) obj;
  if (isCaseSensitive) {
   result = this.stringList.equals(other.getStringList());
  }
  else {
   result = this.stringList.compareToIgnoreCase(other.getStringList());
  }
 }
 else {
  result = false;
 }
 return result;
}

public String next() {
 if (hasNext()) {
  return getToken(index++);
 }
 else {
  throw new NoSuchElementException();
 }
}

 @Override
 public String toString() {
  return this.stringList.toString();
 }
}

for (Iterator<String> it = tokens.iterator(); it.hasNext(); ) {

/**
 * calculate the probability of a bigram in a vocabulary using maximum likelihood estimation
 *
 * @param x0  first word in the bigram
 * @param x1  second word in the bigram
 * @param set the vocabulary
 * @return the maximum likelihood probability
 */
public static double calculateBigramMLProbability(String x0, String x1, Collection<StringList> set) {
 return calculateNgramMLProbability(new StringList(x0, x1), set);
}

private static Double count(StringList ngram, Iterable<StringList> sentences) {
 Double count = 0d;
 for (StringList sentence : sentences) {
  int idx0 = indexOf(sentence, ngram.getToken(0));
  if (idx0 >= 0 && sentence.size() >= idx0 + ngram.size()) {
   boolean match = true;
   for (int i = 1; i < ngram.size(); i++) {
    String sentenceToken = sentence.getToken(idx0 + i);
    String ngramToken = ngram.getToken(i);
    match &= sentenceToken.equals(ngramToken);
   }
   if (match) {
    count++;
   }
  }
 }
 return count;
}

/**
 * calculate the probability of a unigram in a vocabulary using maximum likelihood estimation
 *
 * @param word the only word in the unigram
 * @param set  the vocabulary
 * @return the maximum likelihood probability
 */
public static double calculateUnigramMLProbability(String word, Collection<StringList> set) {
 double vocSize = 0d;
 for (StringList s : set) {
  vocSize += s.size();
 }
 return count(new StringList(word), set) / vocSize;
}

/**
 * Adds the tokens to the dictionary as one new entry.
 *
 * @param tokens the new entry
 */
public void put(StringList tokens) {
 entrySet.add(new StringListWrapper(tokens));
 minTokenCount = Math.min(minTokenCount, tokens.size());
 maxTokenCount = Math.max(maxTokenCount, tokens.size());
}

Javadoc

The StringList is an immutable list of Strings.

Most used methods

<init>
Initializes the current instance. Note: Token Strings will be replaced by identical internal String
getToken
Retrieves a token from the given index.
size
Retrieves the number of tokens inside this list.
compareToIgnoreCase
Compares to tokens list and ignores the case of the tokens. Note: This can cause problems with some
equals
toString
iterator
Retrieves an Iterator over all tokens.
hashCode

Popular in Java

Finding current android device location
getSupportFragmentManager (FragmentActivity)
requestLocationUpdates (LocationManager)
scheduleAtFixedRate (Timer)
Collectors (java.util.stream)
Component (java.awt)
A component is an object having a graphical representation that can be displayed on the screen and t
BoxLayout (javax.swing)
Get (org.apache.hadoop.hbase.client)
Used to perform Get operations on a single row. To get everything for a row, instantiate a Get objec
Runner (org.openjdk.jmh.runner)
Reflections (org.reflections)
Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
Top plugins for WebStorm

How to useStringList in opennlp.tools.util

Best Java code snippets using opennlp.tools.util.StringList (Showing top 20 results out of 315)

How to use
StringList
in
opennlp.tools.util