public static YLanguage getLanguage(String text, Set<YLanguage> possibleLanguages) { try { Detector detector = DetectorFactory.create(0.5f); detector.append(text); return detectLanguage(possibleLanguages, detector); } catch (LangDetectException e) { log.debug("Couldn't determine content language", e); return YLanguage.Undetermined; } }
/** * Initializes the language detection.<p> */ private void initLanguageDetection() { try { // use a seed for initializing the language detection for making sure the // same probabilities are detected for the same document contents DetectorFactory.clear(); DetectorFactory.setSeed(42L); DetectorFactory.loadProfile(loadProfiles(getAvailableLocales())); } catch (Exception e) { LOG.error(Messages.get().getBundle().key(Messages.INIT_I18N_LANG_DETECT_FAILED_0), e); } }
public LanguageIdentifier() throws LangDetectException { DetectorFactory.clear(); try { DetectorFactory.loadProfile(loadProfiles("profiles","profiles.cfg")); } catch (Exception e) { throw new LangDetectException(null, "Error in Initialization: "+e.getMessage()); } } /**
/** * load profiles * @return false if load success */ private boolean loadProfile() { String profileDirectory = get("directory") + "/"; try { DetectorFactory.loadProfile(profileDirectory); Long seed = getLong("seed"); if (seed != null) DetectorFactory.setSeed(seed); return false; } catch (LangDetectException e) { System.err.println("ERROR: " + e.getMessage()); return true; } }
DetectorFactory.clear(); int count = 0; int langsize = profiles.size(); for (LangProfile profile : profiles) { DetectorFactory.addProfile(profile, count, langsize); count++;
/** * Load profiles from specified directory. * This method must be called once before language detection. * * @param profileDirectory profile directory path * @throws LangDetectException Can't open profiles(error code = {@link ErrorCode#FileLoadError}) * or profile's format is wrong (error code = {@link ErrorCode#FormatError}) */ public static void loadProfile(String profileDirectory) throws LangDetectException { loadProfile(new File(profileDirectory)); }
is = new FileInputStream(file); LangProfile profile = JSON.decode(is, LangProfile.class); addProfile(profile, index, langsize); ++index; } catch (JSONException e) {
public static synchronized void loadData() { if (loaded) { return; } loaded = true; List<String> profileData = new ArrayList<String>(); try { Charset encoding = Charset.forName("UTF-8"); for (YLanguage language : detectableLanguages) { try (InputStream stream = new ClassPathResource("langdetect-profiles/" + language.getShortCode()).getInputStream(); BufferedReader reader = new BufferedReader(new InputStreamReader(stream, encoding));) { profileData.add(new String(IOUtils.toCharArray(reader))); } } DetectorFactory.loadProfile(profileData); DetectorFactory.setSeed(System.currentTimeMillis()); } catch (IOException | LangDetectException e) { throw new GeneralBusinessException(e); } }
/** * Load profiles from specified directory. * This method must be called once before language detection. * * @param profileDirectory profile directory path * @throws LangDetectException Can't open profiles(error code = {@link ErrorCode#FileLoadError}) * or profile's format is wrong (error code = {@link ErrorCode#FormatError}) */ public static void loadProfile(List<String> json_profiles) throws LangDetectException { int index = 0; int langsize = json_profiles.size(); if (langsize < 2) throw new LangDetectException(ErrorCode.NeedLoadProfileError, "Need more than 2 profiles"); for (String json: json_profiles) { try { LangProfile profile = JSON.decode(json, LangProfile.class); addProfile(profile, index, langsize); ++index; } catch (JSONException e) { throw new LangDetectException(ErrorCode.FormatError, "profile format error"); } } }
public static YLanguage getLanguage(Reader text, Set<YLanguage> possibleLanguages) { try { Detector detector = DetectorFactory.create(0.5f); detector.append(text); return detectLanguage(possibleLanguages, detector); } catch (LangDetectException | IOException e) { log.debug("Couldn't determine content language", e); return YLanguage.Undetermined; } }
public List<Language> getLanguages(String text) throws LangDetectException { Detector detector = DetectorFactory.create(); detector.append(text); return detector.getProbabilities(); }
public String getLanguage(String text) throws LangDetectException { Detector detector = DetectorFactory.create(); detector.append(text); return detector.detect(); }
public static String detect(String text) throws LangDetectException { Detector detector = DetectorFactory.create(0.5); detector.append(text); return detector.detect(); } }
/** * Try to detect the language of the text in the String. * * @param page * @return true if the String contains English language, false otherwise */ public Boolean isEnglish(String content) { try { if (content == null || content.isEmpty()) { return false; } Detector detector = DetectorFactory.create(); detector.append(content); ArrayList<Language> langs = detector.getProbabilities(); if (langs.size() == 0) { return false; } for (Language l : langs) { if (l.lang.equals("en")) { return true; } } return false; } catch (Exception ex) { logger.warn("Problem while detecting language in text: " + content, ex); return false; } }
String text = line.substring(idx + 1); Detector detector = DetectorFactory.create(getDouble("alpha", DEFAULT_ALPHA)); detector.append(text); String lang = "";
/** * Language detection test for each file (--detectlang option) * * <pre> * usage: --detectlang -d [profile directory] -a [alpha] -s [seed] [test file(s)] * </pre> * */ public void detectLang() { if (loadProfile()) return; for (String filename: arglist) { BufferedReader is = null; try { is = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "utf-8")); Detector detector = DetectorFactory.create(getDouble("alpha", DEFAULT_ALPHA)); if (hasOpt("--debug")) detector.setVerbose(); detector.append(is); System.out.println(filename + ":" + detector.getProbabilities()); } catch (IOException e) { e.printStackTrace(); } catch (LangDetectException e) { e.printStackTrace(); } finally { try { if (is!=null) is.close(); } catch (IOException e) {} } } }
/** * Returns the locale for the given text based on the language detection library.<p> * * The result will be <code>null</code> if the detection fails or the detected locale is not configured * in the 'opencms-system.xml' as available locale.<p> * * @param text the text to retrieve the locale for * * @return the detected locale for the given text */ public static Locale getLocaleForText(String text) { // try to detect locale by language detector if (isNotEmptyOrWhitespaceOnly(text)) { try { Detector detector = DetectorFactory.create(); detector.append(text); String lang = detector.detect(); Locale loc = new Locale(lang); if (OpenCms.getLocaleManager().getAvailableLocales().contains(loc)) { return loc; } } catch (LangDetectException e) { LOG.debug(e); } } return null; }
public static YLanguage processLanguage(Collection<String> inputs, YLanguage currentLanguage) { loadData(); boolean inputsEmpty = true; for (String input : inputs) { inputsEmpty = inputsEmpty && input.isEmpty(); } if (currentLanguage.getShortCode().isEmpty() && unknownLanguages.contains(currentLanguage) && !inputsEmpty) { try { Detector detector = DetectorFactory.create(0.5); for (String input : inputs) { detector.append(input); } for (Language lang : detector.getProbabilities()) { YLanguage yLang = YLanguage.byCode(lang.lang); if (isSupported(yLang)) return yLang; } return currentLanguage; } catch (LangDetectException e) { log.debug("Couldn't determine content language", e); } } return currentLanguage; }
Detector detector = DetectorFactory.create(); detector.append(textContent); detector.getProbabilities();