= new ChineseDocumentToSentenceProcessor(null); boolean expandMidDot = true;
@Override public void init(SeqClassifierFlags flags) { this.flags = flags; factory = LineIterator.getFactory(new CTBDocumentParser()); if (DEBUG) EncodingPrintWriter.err.println("Sighan2005DocRandW: using normalization file " + flags.normalizationTable, "UTF-8"); // pichuan : flags.normalizationTable is null --> i believe this is replaced by some java class?? // (Thu Apr 24 11:10:42 2008) cdtos = new ChineseDocumentToSentenceProcessor(flags.normalizationTable); if (flags.dictionary != null) { String[] dicts = flags.dictionary.split(","); cdict = new ChineseDictionary(dicts, cdtos, flags.expandMidDot); } if (flags.serializedDictionary != null) { String dict = flags.serializedDictionary; cdict = new ChineseDictionary(dict, cdtos, flags.expandMidDot); } if (flags.dictionary2 != null) { String[] dicts2 = flags.dictionary2.split(","); cdict2 = new ChineseDictionary(dicts2, cdtos, flags.expandMidDot); } }
return; cp = new ChineseDocumentToSentenceProcessor(); if (props.containsKey("encoding")) { log.info("WARNING: for now the default encoding is "+cp.encoding+". It's not changeable for now");
= new ChineseDocumentToSentenceProcessor(null); boolean expandMidDot = true;
= new ChineseDocumentToSentenceProcessor(null); boolean expandMidDot = true;
@Override public void init(SeqClassifierFlags flags) { this.flags = flags; factory = LineIterator.getFactory(new CTBDocumentParser()); if (DEBUG) EncodingPrintWriter.err.println("Sighan2005DocRandW: using normalization file " + flags.normalizationTable, "UTF-8"); // pichuan : flags.normalizationTable is null --> i believe this is replaced by some java class?? // (Thu Apr 24 11:10:42 2008) cdtos = new ChineseDocumentToSentenceProcessor(flags.normalizationTable); if (flags.dictionary != null) { String[] dicts = flags.dictionary.split(","); cdict = new ChineseDictionary(dicts, cdtos, flags.expandMidDot); } if (flags.serializedDictionary != null) { String dict = flags.serializedDictionary; cdict = new ChineseDictionary(dict, cdtos, flags.expandMidDot); } if (flags.dictionary2 != null) { String[] dicts2 = flags.dictionary2.split(","); cdict2 = new ChineseDictionary(dicts2, cdtos, flags.expandMidDot); } }
return; cp = new ChineseDocumentToSentenceProcessor(); if (props.containsKey("encoding")) { System.err.println("WARNING: for now the default encoding is "+cp.encoding+". It's not changeable for now");
return; cp = new ChineseDocumentToSentenceProcessor(); if (props.containsKey("encoding")) { log.info("WARNING: for now the default encoding is "+cp.encoding+". It's not changeable for now");
@Override public void init(SeqClassifierFlags flags) { this.flags = flags; factory = LineIterator.getFactory(new CTBDocumentParser()); if (DEBUG) EncodingPrintWriter.err.println("Sighan2005DocRandW: using normalization file " + flags.normalizationTable, "UTF-8"); // pichuan : flags.normalizationTable is null --> i believe this is replaced by some java class?? // (Thu Apr 24 11:10:42 2008) cdtos = new ChineseDocumentToSentenceProcessor(flags.normalizationTable); if (flags.dictionary != null) { String[] dicts = flags.dictionary.split(","); cdict = new ChineseDictionary(dicts, cdtos, flags.expandMidDot); } if (flags.serializedDictionary != null) { String dict = flags.serializedDictionary; cdict = new ChineseDictionary(dict, cdtos, flags.expandMidDot); } if (flags.dictionary2 != null) { String[] dicts2 = flags.dictionary2.split(","); cdict2 = new ChineseDictionary(dicts2, cdtos, flags.expandMidDot); } }