public void closeTag(LangProfile profile) { if ((profile != null) && tag_.equals(target_) && (buf_.length() > threshold_) && !isSpace()) { Util.addCharSequence(profile, textObjectFactory.forText(buf_)); ++count_; } clear(); }
/** * Loads a text file and generate a language profile from its content. The input text file is supposed to be encoded in UTF-8. * @param lang target language name. * @param textFile input text file. * @return Language profile instance */ public static LangProfile generate(String lang, File textFile) { LangProfile profile = new LangProfile(lang); InputStream is = null; try { is = new BufferedInputStream(new FileInputStream(textFile)); if (textFile.getName().endsWith(".gz")) is = new GZIPInputStream(is); BufferedReader reader = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8"))); String line; while ((line = reader.readLine()) != null) { TextObject textObject = textObjectFactory.forText(" "+line+" "); Util.addCharSequence(profile, textObject); } } catch (IOException e) { throw new RuntimeException("Can't open training database file '" + textFile.getName() + "'", e); } finally { IOUtils.closeQuietly(is); } return profile; } }
public void closeTag(LangProfile profile) { if ((profile != null) && tag_.equals(target_) && (buf_.length() > threshold_) && !isSpace()) { Util.addCharSequence(profile, textObjectFactory.forText(buf_)); ++count_; } clear(); }
/** * Loads a text file and generate a language profile from its content. The input text file is supposed to be encoded in UTF-8. * @param lang target language name. * @param textFile input text file. * @return Language profile instance */ public static LangProfile generate(String lang, File textFile) { LangProfile profile = new LangProfile(lang); InputStream is = null; try { is = new BufferedInputStream(new FileInputStream(textFile)); if (textFile.getName().endsWith(".gz")) is = new GZIPInputStream(is); BufferedReader reader = new BufferedReader(new InputStreamReader(is, Charset.forName("UTF-8"))); String line; while ((line = reader.readLine()) != null) { TextObject textObject = textObjectFactory.forText(" "+line+" "); Util.addCharSequence(profile, textObject); } } catch (IOException e) { throw new RuntimeException("Can't open training database file '" + textFile.getName() + "'", e); } finally { IOUtils.closeQuietly(is); } return profile; } }