public String[] put(String word, String... tags) { if (this.caseSensitive) { return dictionary.put(word, tags); } else { return dictionary.put(StringUtil.toLowerCase(word), tags); } }
public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) { if (lowercase) { features.add(WORD_PREFIX + "=" + StringUtil.toLowerCase(tokens[index])); } else { features.add(WORD_PREFIX + "=" + tokens[index]); } } }
/** * Returns a list of valid tags for the specified word. * * @param word The word. * * @return A list of valid tags for the specified word or * null if no information is available for that word. */ public String[] getTags(String word) { if (caseSensitive) { return dictionary.get(word); } else { return dictionary.get(StringUtil.toLowerCase(word)); } }
public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) { String wordClass = FeatureGeneratorUtil.tokenFeature(tokens[index]); features.add(TOKEN_CLASS_PREFIX + "=" + wordClass); if (generateWordAndClassFeature) { features.add(TOKEN_AND_CLASS_PREFIX + "=" + StringUtil.toLowerCase(tokens[index]) + "," + wordClass); } } }
@Override public int hashCode() { // if lookup is too slow optimize this return StringUtil.toLowerCase(this.stringList.toString()).hashCode(); }
public void createFeatures(List<String> features, String[] tokens, int index, String[] previousOutcomes) { String clusterId; if (lowerCaseDictionary) { clusterId = tokenDictionary.lookupToken(StringUtil.toLowerCase(tokens[index])); } else { clusterId = tokenDictionary.lookupToken(tokens[index]); } if (clusterId != null) { features.add(resourceName + clusterId); } } }
/** * Adds character NGrams to the current instance. * * @param chars * @param minLength * @param maxLength */ public void add(CharSequence chars, int minLength, int maxLength) { for (int lengthIndex = minLength; lengthIndex < maxLength + 1; lengthIndex++) { for (int textIndex = 0; textIndex + lengthIndex - 1 < chars.length(); textIndex++) { String gram = StringUtil.toLowerCase( chars.subSequence(textIndex, textIndex + lengthIndex)); add(new StringList(new String[]{gram})); } } }
String leftLower = StringUtil.toLowerCase(parts[parts.length - 1]); key = leftLower + "+" + right; if (CONTRACTIONS.containsKey(key)) {
feats.add("st=" + StringUtil.toLowerCase(toks[index])); return; feats.add("st=" + StringUtil.toLowerCase(tokenized[i]));
public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) { NGramModel model = new NGramModel(); model.add(tokens[index], minLength, maxLength); for (StringList tokenList : model) { if (tokenList.size() > 0) { features.add("ng=" + StringUtil.toLowerCase(tokenList.getToken(0))); } } } }
entityType = StringUtil.toLowerCase(token.substring(typeBegin.length(), typeEnd));
word = words[i]; } else { word = StringUtil.toLowerCase(words[i]);
@Test public void testToLowerCase() { Assert.assertEquals("test", StringUtil.toLowerCase("TEST")); Assert.assertEquals("simple", StringUtil.toLowerCase("SIMPLE")); }
lowerCasedDictionary.put(StringUtil.toLowerCase(entry.getKey()), entry.getValue());
public ObjectStream<NameSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); TokenizerModel tokenizerModel = new TokenizerModelLoader().load(params.getTokenizerModel()); Tokenizer tokenizer = new TokenizerME(tokenizerModel); ObjectStream<String> mucDocStream = new FileToStringSampleStream( new DirectorySampleStream(params.getData(), file -> StringUtil.toLowerCase(file.getName()).endsWith(".sgm"), false), StandardCharsets.UTF_8); return new MucNameSampleStream(tokenizer, mucDocStream); }
public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) { if (lowercase) { features.add(WORD_PREFIX + "=" + StringUtil.toLowerCase(tokens[index])); } else { features.add(WORD_PREFIX + "=" + tokens[index]); } } }
public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) { String wordClass = FeatureGeneratorUtil.tokenFeature(tokens[index]); features.add(TOKEN_CLASS_PREFIX + "=" + wordClass); if (generateWordAndClassFeature) { features.add(TOKEN_AND_CLASS_PREFIX + "=" + StringUtil.toLowerCase(tokens[index]) + "," + wordClass); } } }
@Override public int hashCode() { // if lookup is too slow optimize this return StringUtil.toLowerCase(this.stringList.toString()).hashCode(); }
public ObjectStream<NameSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); TokenizerModel tokenizerModel = new TokenizerModelLoader().load(params.getTokenizerModel()); Tokenizer tokenizer = new TokenizerME(tokenizerModel); ObjectStream<String> mucDocStream = new FileToStringSampleStream( new DirectorySampleStream(params.getData(), file -> StringUtil.toLowerCase(file.getName()).endsWith(".sgm"), false), StandardCharsets.UTF_8); return new MucNameSampleStream(tokenizer, mucDocStream); }
public ObjectStream<NameSample> create(String[] args) { Parameters params = ArgumentParser.parse(args, Parameters.class); TokenizerModel tokenizerModel = new TokenizerModelLoader().load(params.getTokenizerModel()); Tokenizer tokenizer = new TokenizerME(tokenizerModel); ObjectStream<String> mucDocStream = new FileToStringSampleStream( new DirectorySampleStream(params.getData(), file -> StringUtil.toLowerCase(file.getName()).endsWith(".sgm"), false), StandardCharsets.UTF_8); return new MucNameSampleStream(tokenizer, mucDocStream); }