private static com.optimaize.langdetect.LanguageDetector createDetector(List<LanguageProfile> languageProfiles, Map<String, Float> languageProbabilities) { // FUTURE currently the short text algorithm doesn't normalize probabilities until the end, which // means you can often get 0 probabilities. So we pick a very short length for this limit. LanguageDetectorBuilder builder = LanguageDetectorBuilder.create(NgramExtractors.standard()) .shortTextAlgorithm(30) .withProfiles(languageProfiles); if (languageProbabilities != null) { Map<LdLocale, Double> languageWeights = new HashMap<>(languageProbabilities.size()); for (String language : languageProbabilities.keySet()) { Double priority = (double)languageProbabilities.get(language); languageWeights.put(LdLocale.fromString(language), priority); } builder.languagePriorities(languageWeights); } return builder.build(); }
private boolean looksLikeLanguageProfileName(String fileName) { if (fileName.contains(".")) { return false; } try { LdLocale.fromString(fileName); return true; } catch (Exception e) { return false; } }
private boolean looksLikeLanguageProfileName(String fileName) { if (fileName.contains(".")) { return false; } try { LdLocale.fromString(fileName); return true; } catch (Exception e) { return false; } }
@Deprecated public LanguageProfileBuilder(@NotNull String locale) { this.locale = LdLocale.fromString(locale); }
@Deprecated public LanguageProfileBuilder(@NotNull String locale) { this.locale = LdLocale.fromString(locale); }
public static LanguageProfile convert(LangProfile langProfile) { LdLocale locale; try { locale = LdLocale.fromString(langProfile.getName()); } catch (Exception e) { throw new RuntimeException("Profile file name logic was changed in v0.5, please update your custom profiles!", e); } LanguageProfileBuilder builder = new LanguageProfileBuilder(locale); for (Map.Entry<String, Integer> entry : langProfile.getFreq().entrySet()) { builder.addGram(entry.getKey(), entry.getValue()); } return builder.build(); }
public static LanguageProfile convert(LangProfile langProfile) { LdLocale locale; try { locale = LdLocale.fromString(langProfile.getName()); } catch (Exception e) { throw new RuntimeException("Profile file name logic was changed in v0.5, please update your custom profiles!", e); } LanguageProfileBuilder builder = new LanguageProfileBuilder(locale); for (Map.Entry<String, Integer> entry : langProfile.getFreq().entrySet()) { builder.addGram(entry.getKey(), entry.getValue()); } return builder.build(); }
private static com.optimaize.langdetect.LanguageDetector createDetector(List<LanguageProfile> languageProfiles, Map<String, Float> languageProbabilities) { // FUTURE currently the short text algorithm doesn't normalize probabilities until the end, which // means you can often get 0 probabilities. So we pick a very short length for this limit. LanguageDetectorBuilder builder = LanguageDetectorBuilder.create(NgramExtractors.standard()) .shortTextAlgorithm(30) .withProfiles(languageProfiles); if (languageProbabilities != null) { Map<LdLocale, Double> languageWeights = new HashMap<>(languageProbabilities.size()); for (String language : languageProbabilities.keySet()) { Double priority = (double)languageProbabilities.get(language); languageWeights.put(LdLocale.fromString(language), priority); } builder.languagePriorities(languageWeights); } return builder.build(); }
private com.optimaize.langdetect.LanguageDetector createDetector(List<LanguageProfile> languageProfiles) { // FUTURE currently the short text algorithm doesn't normalize probabilities until the end, which // means you can often get 0 probabilities. So we pick a very short length for this limit. LanguageDetectorBuilder builder = LanguageDetectorBuilder.create(NgramExtractors.standard()) .shortTextAlgorithm(30) .withProfiles(languageProfiles); if (languageProbabilities != null) { Map<LdLocale, Double> languageWeights = new HashMap<>(languageProbabilities.size()); for (String language : languageProbabilities.keySet()) { Double priority = (double)languageProbabilities.get(language); languageWeights.put(LdLocale.fromString(language), priority); } builder.languagePriorities(languageWeights); } return builder.build(); }
/** * Détermine une langue à partir d'une source textuelle * @param source Source textuelle * @return Langue majoritaire détectée au sein de la source */ private static Language getLanguageFrom(String source){ TextObject textObject = CommonTextObjectFactories.forDetectingOnLargeText().forText(source); LdLocale lang = getLanguageDetector().detect(textObject) .or(LdLocale.fromString(Constants.defaultLanguage.getAbrev().toLowerCase())); for(Language lg : Language.values()) if(lang.getLanguage().equals(lg.getAbrev().toLowerCase())) return lg; return Constants.defaultLanguage; }