/** * Return a default sentence for the language (for testing) */ public List<? extends HasWord> defaultTestSentence() { String[] sent = {"Solch", "einen", "Zuspruch", "hat", "Angela", "Merkel", "lange", "nicht", "mehr", "erlebt", "."}; return SentenceUtils.toWordList(sent); }
/** Returns the first sentence of TueBaDZ. */ @Override public List<? extends HasWord> defaultTestSentence() { return SentenceUtils.toWordList("Veruntreute", "die", "AWO", "Spendengeld", "?"); }
public List<HasWord> defaultTestSentence() { String[] sent = {"Ésto", "es", "sólo", "una", "prueba", "."}; return SentenceUtils.toWordList(sent); }
/** * Return a default sentence for the language (for testing). * The example is in UTF-8. */ public List<? extends HasWord> defaultTestSentence() { String[] sent = {"هو","استنكر","الحكومة","يوم","امس","."}; return SentenceUtils.toWordList(sent); }
public List<? extends HasWord> defaultTestSentence() { String[] sent = {"H", "MWX", "MTPLC", "LA", "RQ", "M", "H", "TWPEH", "H", "MBIFH", "ALA", "GM", "M", "DRKI", "H", "HERMH", "yyDOT"}; return SentenceUtils.toWordList(sent); }
public List<HasWord> defaultTestSentence() { String[] sent = {"Ceci", "est", "seulement", "un", "test", "."}; return SentenceUtils.toWordList(sent); }
@Override public List<HasWord> segment(String line) { String segmentedString = segmentString(line); return SentenceUtils.toWordList(segmentedString.split("\\s+")); }
public static void main(String[] args) throws Exception { if (args.length != 2) { log.info("usage: java TaggerDemo2 modelFile fileToTag"); return; } MaxentTagger tagger = new MaxentTagger(args[0]); TokenizerFactory<CoreLabel> ptbTokenizerFactory = PTBTokenizer.factory(new CoreLabelTokenFactory(), "untokenizable=noneKeep"); BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(args[1]), "utf-8")); PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.out, "utf-8")); DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(r); documentPreprocessor.setTokenizerFactory(ptbTokenizerFactory); for (List<HasWord> sentence : documentPreprocessor) { List<TaggedWord> tSentence = tagger.tagSentence(sentence); pw.println(SentenceUtils.listToString(tSentence, false)); } // print the adjectives in one more sentence. This shows how to get at words and tags in a tagged sentence. List<HasWord> sent = SentenceUtils.toWordList("The", "slimy", "slug", "crawled", "over", "the", "long", ",", "green", "grass", "."); List<TaggedWord> taggedSent = tagger.tagSentence(sent); for (TaggedWord tw : taggedSent) { if (tw.tag().startsWith("JJ")) { pw.println(tw.word()); } } pw.close(); }
} else { sentences = Generics.newArrayList(); sentences.add(SentenceUtils.toWordList(o.split("\\s+")));
/** * Return a default sentence for the language (for testing) */ public List<? extends HasWord> defaultTestSentence() { String[] sent = {"Solch", "einen", "Zuspruch", "hat", "Angela", "Merkel", "lange", "nicht", "mehr", "erlebt", "."}; return SentenceUtils.toWordList(sent); }
public List<HasWord> defaultTestSentence() { String[] sent = {"Ésto", "es", "sólo", "una", "prueba", "."}; return SentenceUtils.toWordList(sent); }
/** * Return a default sentence for the language (for testing). * The example is in UTF-8. */ public List<? extends HasWord> defaultTestSentence() { String[] sent = {"هو","استنكر","الحكومة","يوم","امس","."}; return SentenceUtils.toWordList(sent); }
/** * Return a default sentence for the language (for testing) */ public List<? extends HasWord> defaultTestSentence() { String[] sent = {"Solch", "einen", "Zuspruch", "hat", "Angela", "Merkel", "lange", "nicht", "mehr", "erlebt", "."}; return SentenceUtils.toWordList(sent); }
public List<HasWord> defaultTestSentence() { String[] sent = {"Ésto", "es", "sólo", "una", "prueba", "."}; return SentenceUtils.toWordList(sent); }
/** Returns the first sentence of TueBaDZ. */ @Override public List<? extends HasWord> defaultTestSentence() { return SentenceUtils.toWordList("Veruntreute", "die", "AWO", "Spendengeld", "?"); }
public List<? extends HasWord> defaultTestSentence() { String[] sent = {"H", "MWX", "MTPLC", "LA", "RQ", "M", "H", "TWPEH", "H", "MBIFH", "ALA", "GM", "M", "DRKI", "H", "HERMH", "yyDOT"}; return SentenceUtils.toWordList(sent); }
/** * Return a default sentence for the language (for testing). * The example is in UTF-8. */ public List<? extends HasWord> defaultTestSentence() { String[] sent = {"هو","استنكر","الحكومة","يوم","امس","."}; return SentenceUtils.toWordList(sent); }
public List<? extends HasWord> defaultTestSentence() { String[] sent = {"H", "MWX", "MTPLC", "LA", "RQ", "M", "H", "TWPEH", "H", "MBIFH", "ALA", "GM", "M", "DRKI", "H", "HERMH", "yyDOT"}; return SentenceUtils.toWordList(sent); }
public List<HasWord> defaultTestSentence() { String[] sent = {"Ceci", "est", "seulement", "un", "test", "."}; return SentenceUtils.toWordList(sent); }
@Override public List<HasWord> segment(String line) { String segmentedString = segmentString(line); return SentenceUtils.toWordList(segmentedString.split("\\s+")); }