opennlp.tools.util.StringUtil.toLowerCase java code examples

public String[] put(String word, String... tags) {
 if (this.caseSensitive) {
  return dictionary.put(word, tags);
 } else {
  return dictionary.put(StringUtil.toLowerCase(word), tags);
 }
}

 public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) {
  if (lowercase) {
   features.add(WORD_PREFIX + "=" + StringUtil.toLowerCase(tokens[index]));
  }
  else {
   features.add(WORD_PREFIX + "=" + tokens[index]);
  }
 }
}

/**
 * Returns a list of valid tags for the specified word.
 *
 * @param word The word.
 *
 * @return A list of valid tags for the specified word or
 *     null if no information is available for that word.
 */
public String[] getTags(String word) {
 if (caseSensitive) {
  return dictionary.get(word);
 }
 else {
  return dictionary.get(StringUtil.toLowerCase(word));
 }
}

 public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) {
  String wordClass = FeatureGeneratorUtil.tokenFeature(tokens[index]);
  features.add(TOKEN_CLASS_PREFIX + "=" + wordClass);

  if (generateWordAndClassFeature) {
   features.add(TOKEN_AND_CLASS_PREFIX + "=" + StringUtil.toLowerCase(tokens[index]) +
     "," + wordClass);
  }
 }
}

@Override
public int hashCode() {
 // if lookup is too slow optimize this
 return StringUtil.toLowerCase(this.stringList.toString()).hashCode();
}

 public void createFeatures(List<String> features, String[] tokens, int index,
   String[] previousOutcomes) {

  String clusterId;
  if (lowerCaseDictionary) {
   clusterId = tokenDictionary.lookupToken(StringUtil.toLowerCase(tokens[index]));
  } else {
   clusterId = tokenDictionary.lookupToken(tokens[index]);
  }
  if (clusterId != null) {
   features.add(resourceName + clusterId);
  }
 }
}

/**
 * Adds character NGrams to the current instance.
 *
 * @param chars
 * @param minLength
 * @param maxLength
 */
public void add(CharSequence chars, int minLength, int maxLength) {
 for (int lengthIndex = minLength; lengthIndex < maxLength + 1; lengthIndex++) {
  for (int textIndex = 0;
    textIndex + lengthIndex - 1 < chars.length(); textIndex++) {
   String gram = StringUtil.toLowerCase(
     chars.subSequence(textIndex, textIndex + lengthIndex));
   add(new StringList(new String[]{gram}));
  }
 }
}

String leftLower = StringUtil.toLowerCase(parts[parts.length - 1]);
key = leftLower + "+" + right;
if (CONTRACTIONS.containsKey(key)) {

feats.add("st=" + StringUtil.toLowerCase(toks[index]));
return;
 feats.add("st=" + StringUtil.toLowerCase(tokenized[i]));

 public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) {
  NGramModel model = new NGramModel();
  model.add(tokens[index], minLength, maxLength);

  for (StringList tokenList : model) {
   if (tokenList.size() > 0) {
    features.add("ng=" + StringUtil.toLowerCase(tokenList.getToken(0)));
   }
  }
 }
}

entityType = StringUtil.toLowerCase(token.substring(typeBegin.length(), typeEnd));

 word = words[i];
} else {
 word = StringUtil.toLowerCase(words[i]);

@Test
public void testToLowerCase() {
 Assert.assertEquals("test", StringUtil.toLowerCase("TEST"));
 Assert.assertEquals("simple", StringUtil.toLowerCase("SIMPLE"));
}

lowerCasedDictionary.put(StringUtil.toLowerCase(entry.getKey()), entry.getValue());

public ObjectStream<NameSample> create(String[] args) {
 Parameters params = ArgumentParser.parse(args, Parameters.class);
 TokenizerModel tokenizerModel = new TokenizerModelLoader().load(params.getTokenizerModel());
 Tokenizer tokenizer = new TokenizerME(tokenizerModel);
 ObjectStream<String> mucDocStream = new FileToStringSampleStream(
   new DirectorySampleStream(params.getData(),
     file -> StringUtil.toLowerCase(file.getName()).endsWith(".sgm"), false),
   StandardCharsets.UTF_8);
 return new MucNameSampleStream(tokenizer, mucDocStream);
}

 public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) {
  if (lowercase) {
   features.add(WORD_PREFIX + "=" + StringUtil.toLowerCase(tokens[index]));
  }
  else {
   features.add(WORD_PREFIX + "=" + tokens[index]);
  }
 }
}

 public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) {
  String wordClass = FeatureGeneratorUtil.tokenFeature(tokens[index]);
  features.add(TOKEN_CLASS_PREFIX + "=" + wordClass);

  if (generateWordAndClassFeature) {
   features.add(TOKEN_AND_CLASS_PREFIX + "=" + StringUtil.toLowerCase(tokens[index]) +
     "," + wordClass);
  }
 }
}

@Override
public int hashCode() {
 // if lookup is too slow optimize this
 return StringUtil.toLowerCase(this.stringList.toString()).hashCode();
}

public ObjectStream<NameSample> create(String[] args) {
 Parameters params = ArgumentParser.parse(args, Parameters.class);
 TokenizerModel tokenizerModel = new TokenizerModelLoader().load(params.getTokenizerModel());
 Tokenizer tokenizer = new TokenizerME(tokenizerModel);
 ObjectStream<String> mucDocStream = new FileToStringSampleStream(
   new DirectorySampleStream(params.getData(),
     file -> StringUtil.toLowerCase(file.getName()).endsWith(".sgm"), false),
   StandardCharsets.UTF_8);
 return new MucNameSampleStream(tokenizer, mucDocStream);
}

public ObjectStream<NameSample> create(String[] args) {
 Parameters params = ArgumentParser.parse(args, Parameters.class);
 TokenizerModel tokenizerModel = new TokenizerModelLoader().load(params.getTokenizerModel());
 Tokenizer tokenizer = new TokenizerME(tokenizerModel);
 ObjectStream<String> mucDocStream = new FileToStringSampleStream(
   new DirectorySampleStream(params.getData(),
     file -> StringUtil.toLowerCase(file.getName()).endsWith(".sgm"), false),
   StandardCharsets.UTF_8);
 return new MucNameSampleStream(tokenizer, mucDocStream);
}

Javadoc

Converts to lower case independent of the current locale via Character#toLowerCase(char) which uses mapping information from the UnicodeData file.

Popular methods of StringUtil

isWhitespace
Determines if the specified character is a whitespace. A character is considered a whitespace when o
isEmpty
Returns true if CharSequence#length() is0 or null.
toUpperCase
Converts to upper case independent of the current locale via Character#toUpperCase(char) which uses
computeShortestEditScript
Computes the Shortest Edit Script (SES) to convert a word into its lemma. This is based on Chrupala'
decodeShortestEditScript
Read predicted SES by the lemmatizer model and apply the permutations to obtain the lemma from the w
getShortestEditScript
Get the SES required to go from a word to a lemma.
levenshteinDistance
Computes the Levenshtein distance of two strings in a matrix. Based on pseudo-code provided here: ht
minimum
Get mininum of three values.

Popular in Java

Creating JSON documents from java classes using gson
scheduleAtFixedRate (Timer)
scheduleAtFixedRate (ScheduledExecutorService)
runOnUiThread (Activity)
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
Handler (java.util.logging)
A Handler object accepts a logging request and exports the desired messages to a target, for example
HttpServlet (javax.servlet.http)
Provides an abstract class to be subclassed to create an HTTP servlet suitable for a Web site. A sub
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
GridBagLayout (java.awt)
The GridBagLayout class is a flexible layout manager that aligns components vertically and horizonta
Top Vim plugins

How to use toLowerCasemethodin opennlp.tools.util.StringUtil

Best Java code snippets using opennlp.tools.util.StringUtil.toLowerCase (Showing top 20 results out of 315)

How to use
toLowerCase
method
in
opennlp.tools.util.StringUtil