public boolean containsLanguage(String langCode){ return langs.contains(Language.getByLangCode(langCode)); }
private static Collection<Language> getLangsFromCodes(Collection<String> langCodes) { Collection<Language> languages = new ArrayList<Language>(); for (String langCode : langCodes) { languages.add(Language.getByLangCode(langCode.trim())); } return languages; }
public CorpusInfo(String line) { if (!line.startsWith("@WikiBrainCorpus")) { throw new IllegalArgumentException("Invalid corpus line: " + line); } String tokens[] = line.split("\t"); if (tokens.length != 5) { throw new IllegalArgumentException("Invalid corpus line: " + line); } language = Language.getByLangCode(tokens[1]); corpusClass = tokens[2].trim(); wikifierClass = tokens[3].trim(); creationTime = tokens[4].trim(); }
private Language getRealLang(Language lang) { if (lang.getLangCode().equals("simple")) { return Language.getByLangCode("en"); } else { return lang; } }
public Language getLanguage(String link) { int end = link.lastIndexOf("wiki"); if (end < 1) { throw new IllegalStateException("No language detected for " + link); } int beg; for (beg = end-1; beg >=0 && isLangChar(link.charAt(beg)); beg--) { // All work is done in loop condition. } return Language.getByLangCode(link.substring(beg + 1, end)); }
private Language getRealLang(Language lang) { if (lang.getLangCode().equals("simple")) { return Language.getByLangCode("en"); } else { return lang; } }
public static Language getByLangCodeLenient(String langCode) { langCode = langCode.replace('_', '-').toLowerCase(); List<String> flavors = new ArrayList<String>(); flavors.add(langCode); if (langCode.contains("-")) { flavors.add(langCode.substring(0, langCode.indexOf("-"))); } for (String s : flavors) { try { return getByLangCode(s); } catch (IllegalArgumentException e) { } } throw new IllegalArgumentException("unknown langCode: '" + langCode + "'"); }
@Override public String toString() { String name; Language en = Language.getByLangCode("en"); if (labels.containsKey(en)) { name = labels.get(en); } else if (labels.isEmpty()) { name = "unknown"; } else { name = labels.values().iterator().next(); } return "WikidataEntity{" + "type=" + type + ", id=" + id + ", name=" + name + '}'; } }
public DumpLinkInfo(String langCode, String date, String linkMatcher, String url, String md5, int counter) throws MalformedURLException { this.language = Language.getByLangCode(langCode); this.date = date; this.linkMatcher = FileMatcher.getByName(linkMatcher); this.url = new URL(url); this.md5 = md5; this.counter = counter; }
@Override public String toString() { String name; Language en = Language.getByLangCode("en"); if (labels.containsKey(en)) { name = labels.get(en); } else if (labels.isEmpty()) { name = "unknown"; } else { name = labels.values().iterator().next(); } return "WikidataEntity{" + "type=" + type + ", id=" + id + ", name=" + name + '}'; } }
public static LanguageInfo getByLangCode(String langCode) { return LANGUAGE_INFOS[Language.getByLangCode(langCode).getId() - 1]; }
/** * Returns English if English is in the set, else returns Simple. If Simple is not in the * set, will return the default language or throws an exception, depending on the value of returnDefaultLangIfEnglishNotAvailable * @return * @throws WikiBrainException */ public Language getBestAvailableEnglishLang(boolean returnDefaultLangIfEnglishNotAvailable) throws WikiBrainException { if (this.containsLanguage(Language.getByLangCode("en"))){ return Language.getByLangCode("en"); }else if (this.containsLanguage(Language.getByLangCode("simple"))){ return Language.getByLangCode("simple"); }else{ if (returnDefaultLangIfEnglishNotAvailable){ return this.getDefaultLanguage(); } throw new WikiBrainException("No English language available"); } }
/** * Converts a compact url representation of a page to a LocalPage. * @param s * @return The local page, or null if the string was not a url. */ public static LocalPage fromCompactUrl(String s) { String parts[] = s.split("/", 5); if (s.startsWith("/w/") && parts.length == 5 && Language.hasLangCode(parts[2])) { return new LocalPage( Language.getByLangCode(parts[2]), Integer.valueOf(parts[3]), parts[4] ); } else { return null; } }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { if (stopWords == null){ stopWords = getStopWordsForNonLuceneLangFromFile(matchVersion, Language.getByLangCode("sk")); } TokenStream stream = tokenizer; if (useStopWords) stream = new StopFilter(matchVersion, stream, stopWords); return stream; } }
public static void main(String[] args) throws IOException { DumpLinkGetter testGetter = new DumpLinkGetter(Language.getByLangCode("en"), Arrays.asList(FileMatcher.ARTICLES), "20130604"); // System.out.println(testGetter.getMd5Sums(testGetter.getFileLinks())); System.out.println(testGetter.getDumpFiles(testGetter.getFileLinks())); }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { if (stopWords == null){ stopWords = LanguageTokenizer.getStopWordsForNonLuceneLangFromFile(matchVersion, Language.getByLangCode("he")); } TokenStream stream = tokenizer; if (useStopWords) stream = new StopFilter(matchVersion, stream, stopWords); return stream; } }
@Override public Wikifier get(String name, Config config, Map<String, String> runtimeParams) throws ConfigurationException { if (runtimeParams == null || !runtimeParams.containsKey("language")) { throw new IllegalArgumentException("Wikifier requires 'language' runtime parameter."); } if (!config.getString("type").equals("identity")) { return null; } Language language = Language.getByLangCode(runtimeParams.get("language")); String linkName = config.getString("localLinkDao"); return new IdentityWikifier(language, getConfigurator().get(RawPageDao.class), getConfigurator().get(LocalLinkDao.class, linkName)); } }
public Language getLanguage() { String code = getParamOrDie("lang"); if (!Language.hasLangCode(code)) { throw new WikiBrainWebException("Unknown language code: " + code); } return Language.getByLangCode(code); }
public static void main(String[] args) { SRResult s = null; try { Env env = new EnvBuilder().build(); Configurator conf = env.getConfigurator(); LocalPageDao lpDao = conf.get(LocalPageDao.class); Language simple = Language.getByLangCode("simple"); SRMetric sr = conf.get( SRMetric.class, "ensemble", "language", simple.getLangCode()); s = sr.similarity("cat","kitty",true); } catch (ConfigurationException e) { System.out.println("Configuration Exception: "+e.getMessage()); } catch (DaoException e) { System.out.println("Dao Exception: "+e.getMessage()); } System.out.println("The score for this two pages:"+s.getScore()); } }
public static void main(String[] args) { SRResult s = null; try { Env env = new EnvBuilder().build(); Configurator conf = env.getConfigurator(); LocalPageDao lpDao = conf.get(LocalPageDao.class); Language simple = Language.getByLangCode("simple"); SRMetric sr = conf.get( SRMetric.class, "ensemble", "language", simple.getLangCode()); s = sr.similarity("cat","kitty",true); } catch (ConfigurationException e) { System.out.println("Configuration Exception: "+e.getMessage()); } catch (DaoException e) { System.out.println("Dao Exception: "+e.getMessage()); } System.out.println("The score for this two pages:"+s.getScore()); } }