edu.stanford.nlp.ling.SentenceUtils.toWordList java code examples

/**
 * Return a default sentence for the language (for testing)
 */
public List<? extends HasWord> defaultTestSentence() {
 String[] sent = {"Solch", "einen", "Zuspruch", "hat", "Angela", "Merkel", "lange", "nicht", "mehr", "erlebt", "."};
 return SentenceUtils.toWordList(sent);
}

/** Returns the first sentence of TueBaDZ. */
@Override
public List<? extends HasWord> defaultTestSentence() {
 return SentenceUtils.toWordList("Veruntreute", "die", "AWO", "Spendengeld", "?");
}

public List<HasWord> defaultTestSentence() {
 String[] sent = {"Ésto", "es", "sólo", "una", "prueba", "."};
 return SentenceUtils.toWordList(sent);
}

/**
 * Return a default sentence for the language (for testing).
 * The example is in UTF-8.
 */
public List<? extends HasWord> defaultTestSentence() {
 String[] sent = {"هو","استنكر","الحكومة","يوم","امس","."};
 return SentenceUtils.toWordList(sent);
}

public List<? extends HasWord> defaultTestSentence() {
 String[] sent = {"H", "MWX", "MTPLC", "LA", "RQ", "M", "H", "TWPEH", "H", "MBIFH", "ALA", "GM", "M", "DRKI", "H", "HERMH", "yyDOT"};
 return SentenceUtils.toWordList(sent);
}

public List<HasWord> defaultTestSentence() {
 String[] sent = {"Ceci", "est", "seulement", "un", "test", "."};
 return SentenceUtils.toWordList(sent);
}

@Override
public List<HasWord> segment(String line) {
 String segmentedString = segmentString(line);
 return SentenceUtils.toWordList(segmentedString.split("\\s+"));
}

public static void main(String[] args) throws Exception {
 if (args.length != 2) {
  log.info("usage: java TaggerDemo2 modelFile fileToTag");
  return;
 }
 MaxentTagger tagger = new MaxentTagger(args[0]);
 TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(),
                   "untokenizable=noneKeep");
 BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
 PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8"));
 DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r);
 documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory);
 for (List<HasWord> sentence : documentPreprocessor) {
  List<TaggedWord> tSentence = tagger.tagSentence(sentence);
  pw.println(SentenceUtils.listToString(tSentence, false));
 }
 // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence.
 List<HasWord> sent = SentenceUtils.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", ".");
 List<TaggedWord> taggedSent = tagger.tagSentence(sent);
 for (TaggedWord tw : taggedSent) {
  if (tw.tag().startsWith("JJ")) {
   pw.println(tw.word());
  }
 }
 pw.close();
}

} else {
 sentences = Generics.newArrayList();
 sentences.add(SentenceUtils.toWordList(o.split("\\s+")));

/**
 * Return a default sentence for the language (for testing)
 */
public List<? extends HasWord> defaultTestSentence() {
 String[] sent = {"Solch", "einen", "Zuspruch", "hat", "Angela", "Merkel", "lange", "nicht", "mehr", "erlebt", "."};
 return SentenceUtils.toWordList(sent);
}

public List<HasWord> defaultTestSentence() {
 String[] sent = {"Ésto", "es", "sólo", "una", "prueba", "."};
 return SentenceUtils.toWordList(sent);
}

/**
 * Return a default sentence for the language (for testing).
 * The example is in UTF-8.
 */
public List<? extends HasWord> defaultTestSentence() {
 String[] sent = {"هو","استنكر","الحكومة","يوم","امس","."};
 return SentenceUtils.toWordList(sent);
}

/**
 * Return a default sentence for the language (for testing)
 */
public List<? extends HasWord> defaultTestSentence() {
 String[] sent = {"Solch", "einen", "Zuspruch", "hat", "Angela", "Merkel", "lange", "nicht", "mehr", "erlebt", "."};
 return SentenceUtils.toWordList(sent);
}

public List<HasWord> defaultTestSentence() {
 String[] sent = {"Ésto", "es", "sólo", "una", "prueba", "."};
 return SentenceUtils.toWordList(sent);
}

/** Returns the first sentence of TueBaDZ. */
@Override
public List<? extends HasWord> defaultTestSentence() {
 return SentenceUtils.toWordList("Veruntreute", "die", "AWO", "Spendengeld", "?");
}

public List<? extends HasWord> defaultTestSentence() {
 String[] sent = {"H", "MWX", "MTPLC", "LA", "RQ", "M", "H", "TWPEH", "H", "MBIFH", "ALA", "GM", "M", "DRKI", "H", "HERMH", "yyDOT"};
 return SentenceUtils.toWordList(sent);
}

/**
 * Return a default sentence for the language (for testing).
 * The example is in UTF-8.
 */
public List<? extends HasWord> defaultTestSentence() {
 String[] sent = {"هو","استنكر","الحكومة","يوم","امس","."};
 return SentenceUtils.toWordList(sent);
}

public List<? extends HasWord> defaultTestSentence() {
 String[] sent = {"H", "MWX", "MTPLC", "LA", "RQ", "M", "H", "TWPEH", "H", "MBIFH", "ALA", "GM", "M", "DRKI", "H", "HERMH", "yyDOT"};
 return SentenceUtils.toWordList(sent);
}

public List<HasWord> defaultTestSentence() {
 String[] sent = {"Ceci", "est", "seulement", "un", "test", "."};
 return SentenceUtils.toWordList(sent);
}

@Override
public List<HasWord> segment(String line) {
 String segmentedString = segmentString(line);
 return SentenceUtils.toWordList(segmentedString.split("\\s+"));
}

Popular methods of SentenceUtils

listToString
Pretty print CoreMap classes using the same semantics as the toShorterString method.
listToOriginalTextString
Returns the sentence as a string, based on the original text and spacing prior to tokenization. This
toCoreLabelList
Create a sentence as a List of CoreLabel objects from an array (or varargs) of String objects.
toUntaggedList
Create a Sentence as a list of Word objects from an array of String objects.
wordToString

Popular in Java

Reactive rest calls using spring rest template
addToBackStack (FragmentTransaction)
setRequestProperty (URLConnection)
getSystemService (Context)
LinkedList (java.util)
Doubly-linked list implementation of the List and Dequeinterfaces. Implements all optional list oper
Set (java.util)
A Set is a data structure which does not allow duplicate elements.
UUID (java.util)
UUID is an immutable representation of a 128-bit universally unique identifier (UUID). There are mul
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
Window (java.awt)
A Window object is a top-level window with no borders and no menubar. The default layout for a windo
Top 12 Jupyter Notebook extensions

How to use toWordListmethodin edu.stanford.nlp.ling.SentenceUtils

Best Java code snippets using edu.stanford.nlp.ling.SentenceUtils.toWordList (Showing top 20 results out of 315)

How to use
toWordList
method
in
edu.stanford.nlp.ling.SentenceUtils