/** * Append the target text for language detection. * This method read the text from specified input reader. * If the total size of target text exceeds the limit size specified by {@link Detector#setMaxTextLength(int)}, * the rest is cut down. * * @param reader the input reader (BufferedReader as usual) * @throws IOException Can't read the reader. */ public void append(Reader reader) throws IOException { char[] buf = new char[max_text_length/2]; while (text.length() < max_text_length && reader.ready()) { int length = reader.read(buf); append(new String(buf, 0, length)); } }
public static YLanguage getLanguage(String text, Set<YLanguage> possibleLanguages) { try { Detector detector = DetectorFactory.create(0.5f); detector.append(text); return detectLanguage(possibleLanguages, detector); } catch (LangDetectException e) { log.debug("Couldn't determine content language", e); return YLanguage.Undetermined; } }
public static YLanguage getLanguage(Reader text, Set<YLanguage> possibleLanguages) { try { Detector detector = DetectorFactory.create(0.5f); detector.append(text); return detectLanguage(possibleLanguages, detector); } catch (LangDetectException | IOException e) { log.debug("Couldn't determine content language", e); return YLanguage.Undetermined; } }
public static String detect(String text) throws LangDetectException { Detector detector = DetectorFactory.create(0.5); detector.append(text); return detector.detect(); } }
public List<Language> getLanguages(String text) throws LangDetectException { Detector detector = DetectorFactory.create(); detector.append(text); return detector.getProbabilities(); }
public DetectedLanguages detect(String text) throws LanguageDetectorException { Detector shuyoDetector = new Detector( wordLangProbMap, detectableLanguages, 0L); shuyoDetector.append(text); return doDetect(shuyoDetector); } public DetectedLanguages doDetect(Detector shuyoDetector)
public String getLanguage(String text) throws LangDetectException { Detector detector = DetectorFactory.create(); detector.append(text); return detector.detect(); }
/** * Try to detect the language of the text in the String. * * @param page * @return true if the String contains English language, false otherwise */ public Boolean isEnglish(String content) { try { if (content == null || content.isEmpty()) { return false; } Detector detector = DetectorFactory.create(); detector.append(content); ArrayList<Language> langs = detector.getProbabilities(); if (langs.size() == 0) { return false; } for (Language l : langs) { if (l.lang.equals("en")) { return true; } } return false; } catch (Exception ex) { logger.warn("Problem while detecting language in text: " + content, ex); return false; } }
public DetectedLanguages detect(Reader reader) throws LanguageDetectorException { //TODO wrap Reader in BufferedReader? Detector shuyoDetector = new Detector( wordLangProbMap, detectableLanguages, 0L); try { shuyoDetector.append(reader); // read the rest of Reader instance to ensure cleanliness int data = reader.read(); while(data != -1){ data = reader.read(); } } catch (IOException e) { throw new LanguageDetectorException( "Could not detect language from Reader.", e); } return doDetect(shuyoDetector); }
detector.append(text); String lang = ""; try {
/** * Language detection test for each file (--detectlang option) * * <pre> * usage: --detectlang -d [profile directory] -a [alpha] -s [seed] [test file(s)] * </pre> * */ public void detectLang() { if (loadProfile()) return; for (String filename: arglist) { BufferedReader is = null; try { is = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "utf-8")); Detector detector = DetectorFactory.create(getDouble("alpha", DEFAULT_ALPHA)); if (hasOpt("--debug")) detector.setVerbose(); detector.append(is); System.out.println(filename + ":" + detector.getProbabilities()); } catch (IOException e) { e.printStackTrace(); } catch (LangDetectException e) { e.printStackTrace(); } finally { try { if (is!=null) is.close(); } catch (IOException e) {} } } }
public static YLanguage processLanguage(Collection<String> inputs, YLanguage currentLanguage) { loadData(); boolean inputsEmpty = true; for (String input : inputs) { inputsEmpty = inputsEmpty && input.isEmpty(); } if (currentLanguage.getShortCode().isEmpty() && unknownLanguages.contains(currentLanguage) && !inputsEmpty) { try { Detector detector = DetectorFactory.create(0.5); for (String input : inputs) { detector.append(input); } for (Language lang : detector.getProbabilities()) { YLanguage yLang = YLanguage.byCode(lang.lang); if (isSupported(yLang)) return yLang; } return currentLanguage; } catch (LangDetectException e) { log.debug("Couldn't determine content language", e); } } return currentLanguage; }
/** * Returns the locale for the given text based on the language detection library.<p> * * The result will be <code>null</code> if the detection fails or the detected locale is not configured * in the 'opencms-system.xml' as available locale.<p> * * @param text the text to retrieve the locale for * * @return the detected locale for the given text */ public static Locale getLocaleForText(String text) { // try to detect locale by language detector if (isNotEmptyOrWhitespaceOnly(text)) { try { Detector detector = DetectorFactory.create(); detector.append(text); String lang = detector.detect(); Locale loc = new Locale(lang); if (OpenCms.getLocaleManager().getAvailableLocales().contains(loc)) { return loc; } } catch (LangDetectException e) { LOG.debug(e); } } return null; }
String detectedLanguage = ""; Detector detector = DetectorFactory.create(); detector.append(textContent); detector.getProbabilities(); ArrayList<String> possibileLanguages = new ArrayList<>();