/** * Detect language of the target text and return the language name which has the highest probability. * @return detected language name which has most probability. * @throws LangDetectException * code = ErrorCode.CantDetectError : Can't detect because of no valid features in text */ public String detect() throws LangDetectException { ArrayList<Language> probabilities = getProbabilities(); if (probabilities.size() > 0) return probabilities.get(0).lang; return UNKNOWN_LANG; }
private static YLanguage detectLanguage(Set<YLanguage> possibleLanguages, Detector detector) throws LangDetectException { for (Language language : detector.getProbabilities()) { YLanguage lang = YLanguage.byCode(language.lang); if (language.prob > MINIMUM_PROBABILITY) { return lang; } if (language.prob > MINIMUM_PROBABILITY_FROM_SET && possibleLanguages.contains(lang)) { return lang; } } return YLanguage.Undetermined; } }
public List<Language> getLanguages(String text) throws LangDetectException { Detector detector = DetectorFactory.create(); detector.append(text); return detector.getProbabilities(); }
public DetectedLanguages doDetect(Detector shuyoDetector) throws LanguageDetectorException { try { return new DetectedLanguages(shuyoDetector.getProbabilities()); } catch (LangDetectException e) { throw new LanguageDetectorException( "Cannot detect language(s).", e); } }
/** * Try to detect the language of the text in the String. * * @param page * @return true if the String contains English language, false otherwise */ public Boolean isEnglish(String content) { try { if (content == null || content.isEmpty()) { return false; } Detector detector = DetectorFactory.create(); detector.append(content); ArrayList<Language> langs = detector.getProbabilities(); if (langs.size() == 0) { return false; } for (Language l : langs) { if (l.lang.equals("en")) { return true; } } return false; } catch (Exception ex) { logger.warn("Problem while detecting language in text: " + content, ex); return false; } }
/** * Language detection test for each file (--detectlang option) * * <pre> * usage: --detectlang -d [profile directory] -a [alpha] -s [seed] [test file(s)] * </pre> * */ public void detectLang() { if (loadProfile()) return; for (String filename: arglist) { BufferedReader is = null; try { is = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "utf-8")); Detector detector = DetectorFactory.create(getDouble("alpha", DEFAULT_ALPHA)); if (hasOpt("--debug")) detector.setVerbose(); detector.append(is); System.out.println(filename + ":" + detector.getProbabilities()); } catch (IOException e) { e.printStackTrace(); } catch (LangDetectException e) { e.printStackTrace(); } finally { try { if (is!=null) is.close(); } catch (IOException e) {} } } }
public static YLanguage processLanguage(Collection<String> inputs, YLanguage currentLanguage) { loadData(); boolean inputsEmpty = true; for (String input : inputs) { inputsEmpty = inputsEmpty && input.isEmpty(); } if (currentLanguage.getShortCode().isEmpty() && unknownLanguages.contains(currentLanguage) && !inputsEmpty) { try { Detector detector = DetectorFactory.create(0.5); for (String input : inputs) { detector.append(input); } for (Language lang : detector.getProbabilities()) { YLanguage yLang = YLanguage.byCode(lang.lang); if (isSupported(yLang)) return yLang; } return currentLanguage; } catch (LangDetectException e) { log.debug("Couldn't determine content language", e); } } return currentLanguage; }
Detector detector = DetectorFactory.create(); detector.append(textContent); detector.getProbabilities(); ArrayList<String> possibileLanguages = new ArrayList<>(); ArrayList<Language> possibilities = detector.getProbabilities(); for (Language possibility : possibilities) { possibileLanguages.add(possibility.lang);