/** * @param contentString Chinese document text * @return a List of sentence strings * @throws IOException */ public static List<String> fromPlainText(String contentString) throws IOException { return fromPlainText(contentString, false); }
/** * Strip off HTML tags before processing. * Only the simplest tag stripping is implemented. * * @param inputString Chinese document text which contains HTML tags * @return a List of sentence strings */ public static List<String> fromHTML(String inputString) throws IOException { //HTMLParser parser = new HTMLParser(); //return fromPlainText(parser.parse(inputString)); List<String> ans = new ArrayList<>(); MyHTMLParser parser = new MyHTMLParser(); List<String> sents = parser.parse(inputString); for (String s : sents) { ans.addAll(fromPlainText(s)); } return ans; }
List<String> sents = ChineseDocumentToSentenceProcessor.fromPlainText(buff.toString(), true);
/** * @param contentString Chinese document text * @return a List of sentence strings * @throws IOException */ public static List<String> fromPlainText(String contentString) throws IOException { return fromPlainText(contentString, false); }
/** * @param contentString Chinese document text * @return a List of sentence strings * @throws IOException */ public static List<String> fromPlainText(String contentString) throws IOException { return fromPlainText(contentString, false); }
/** * Strip off HTML tags before processing. * Only the simplest tag stripping is implemented. * * @param inputString Chinese document text which contains HTML tags * @return a List of sentence strings */ public static List<String> fromHTML(String inputString) throws IOException { //HTMLParser parser = new HTMLParser(); //return fromPlainText(parser.parse(inputString)); List<String> ans = new ArrayList<>(); MyHTMLParser parser = new MyHTMLParser(); List<String> sents = parser.parse(inputString); for (String s : sents) { ans.addAll(fromPlainText(s)); } return ans; }
/** * Strip off HTML tags before processing. * Only the simplest tag stripping is implemented. * * @param inputString Chinese document text which contains HTML tags * @return a List of sentence strings */ public static List<String> fromHTML(String inputString) throws IOException { //HTMLParser parser = new HTMLParser(); //return fromPlainText(parser.parse(inputString)); List<String> ans = new ArrayList<String>(); MyHTMLParser parser = new MyHTMLParser(); List<String> sents = parser.parse(inputString); for (String s : sents) { ans.addAll(fromPlainText(s)); } return ans; }
List<String> sents = ChineseDocumentToSentenceProcessor.fromPlainText(buff.toString(), true);
List<String> sents = ChineseDocumentToSentenceProcessor.fromPlainText(buff.toString(), true);