/** * Return a default sentence for the language (for testing) */ @Override public ArrayList<Word> defaultTestSentence() { return SentenceUtils.toUntaggedList("\u951f\u65a4\u62f7", "\u951f\u65a4\u62f7", "\u5b66\u6821", "\u951f\u65a4\u62f7", "\u5b66\u4e60", "\u951f\u65a4\u62f7"); }
ArrayList<Word> sent = SentenceUtils.toUntaggedList(infile.readLine().split("\\s+"));
/** * Tags the tokenized input string and returns the tagged version. * This method requires the input to already be tokenized. * The tagger wants input that is whitespace separated tokens, tokenized * according to the conventions of the training data. (For instance, * for the Penn Treebank, punctuation marks and possessive "'s" should * be separated from words.) * * @param toTag The untagged input String * @return The same string with tags inserted in the form word/tag */ public String tagTokenizedString(String toTag) { List<Word> sent = SentenceUtils.toUntaggedList(Arrays.asList(toTag.split("\\s+"))); TestSentence testSentence = new TestSentence(this); testSentence.tagSentence(sent, false); return testSentence.getTaggedNice(); }
/** * Return a default sentence for the language (for testing) */ @Override public ArrayList<Word> defaultTestSentence() { return SentenceUtils.toUntaggedList("\u951f\u65a4\u62f7", "\u951f\u65a4\u62f7", "\u5b66\u6821", "\u951f\u65a4\u62f7", "\u5b66\u4e60", "\u951f\u65a4\u62f7"); }
/** * Return a default sentence for the language (for testing) */ @Override public ArrayList<Word> defaultTestSentence() { return SentenceUtils.toUntaggedList("\u951f\u65a4\u62f7", "\u951f\u65a4\u62f7", "\u5b66\u6821", "\u951f\u65a4\u62f7", "\u5b66\u4e60", "\u951f\u65a4\u62f7"); }
/** * Tags the tokenized input string and returns the tagged version. * This method requires the input to already be tokenized. * The tagger wants input that is whitespace separated tokens, tokenized * according to the conventions of the training data. (For instance, * for the Penn Treebank, punctuation marks and possessive "'s" should * be separated from words.) * * @param toTag The untagged input String * @return The same string with tags inserted in the form word/tag */ public String tagTokenizedString(String toTag) { List<Word> sent = SentenceUtils.toUntaggedList(Arrays.asList(toTag.split("\\s+"))); TestSentence testSentence = new TestSentence(this); testSentence.tagSentence(sent, false); return testSentence.getTaggedNice(); }
/** * Tags the tokenized input string and returns the tagged version. * This method requires the input to already be tokenized. * The tagger wants input that is whitespace separated tokens, tokenized * according to the conventions of the training data. (For instance, * for the Penn Treebank, punctuation marks and possessive "'s" should * be separated from words.) * * @param toTag The untagged input String * @return The same string with tags inserted in the form word/tag */ public String tagTokenizedString(String toTag) { List<Word> sent = SentenceUtils.toUntaggedList(Arrays.asList(toTag.split("\\s+"))); TestSentence testSentence = new TestSentence(this); testSentence.tagSentence(sent, false); return testSentence.getTaggedNice(); }