/** * Returns a string for the local path in which to save this dump file * @return */ public String getLocalPath() { return language.getLangCode() + "/" + date; }
/** * Get the URL of the index wiki page of a specified language. * @return */ protected String getLanguageWikiUrl() { // langCode with dashes like "roa-tara" should be 'roa_tara' in dump links return BASEURL_STRING + "/" + lang.getLangCode().replace("-", "_") + "wiki/"; }
private String localize(String str) { return str.replace("LANG", language.getLangCode()); } private void initWord2Vec(String name) throws ConfigurationException, IOException, DaoException, InterruptedException {
private void writeHeader() throws IOException { output.write("ITEM_NAME_1"); output.write("\tITEM_ID_1"); output.write("\tITEM_NAME_2"); output.write("\tITEM_ID_2"); output.write("\tSPATIAL_DISTANCE"); for (Language lang : langs) { output.write("\t" + lang.getLangCode() + "_SR"); } }
public String getLangCodeString() { List<String> output = Lists.newArrayList(); for (Language lang : langs) { if (lang.equals(defaultLanguage)) { output.add(lang.getLangCode().toUpperCase()); } else { output.add(lang.getLangCode()); } } Collections.sort(output); return StringUtils.join(output, ","); }
synchronized PhraseAnalyzerLangDao getDao(Language lang) throws DaoException { File subDir = new File(dir, lang.getLangCode()); if (langDaos.containsKey(lang)) { return langDaos.get(lang); } else if (subDir.isDirectory() || isNew) { langDaos.put(lang, new PhraseAnalyzerLangDao(normalizer, lang, subDir, isNew)); return langDaos.get(lang); } else { // throw new DaoException("No phrase dao available for " + lang); return null; } }
@Override public SRMetric create() { try { Map<String, String> runtimeParams = new HashMap<String, String>(); runtimeParams.put("language", language.getLangCode()); return configurator.construct(SRMetric.class, name, config, runtimeParams); } catch (ConfigurationException e) { throw new RuntimeException(e); } }
private Language getRealLang(Language lang) { if (lang.getLangCode().equals("simple")) { return Language.getByLangCode("en"); } else { return lang; } }
static public List<String> getLangCodes(List<LanguageInfo> langs) { List<String> langCodes = new ArrayList<String>(); for (LanguageInfo l : langs) { langCodes.add(l.getLanguage().getLangCode()); } return langCodes; } }
private Language getRealLang(Language lang) { if (lang.getLangCode().equals("simple")) { return Language.getByLangCode("en"); } else { return lang; } }
private void doLanguages(WikiBrainWebRequest req) { List<String> langs = new ArrayList<String>(); for (Language l : env.getLanguages()) { langs.add(l.getLangCode()); } Collections.sort(langs); req.writeJsonResponse("languages", langs); }
private Map pageJson(LocalPage p) { if (p == null) { return null; } Map json = new HashMap(); json.put("articleId", p.getLocalId()); json.put("title", p.getTitle().getCanonicalTitle()); json.put("lang", p.getLanguage().getLangCode()); return json; }
private DenseVectorSRMetric getVectorSr(Language lang) throws ConfigurationException { return (DenseVectorSRMetric) env.getConfigurator().get( SRMetric.class, vectorSrName, "language", lang.getLangCode()); }
/** * @return, for example "/w/en/1000/Hercule_Poirot" */ public String getCompactUrl() { String escapedTitle = getTitle().getCanonicalTitle().replace(" ", "_"); escapedTitle = escapedTitle.replaceAll("\\s+", ""); return "/w/" + getLanguage().getLangCode() + "/" + getLocalId() + "/" + escapedTitle; }
public void init() throws ConfigurationException, DaoException { // Warm up necessary components for (Language l : env.getLanguages()) { LOG.info("warming up components for language: " + l); getSr(l); env.getConfigurator().get(Wikifier.class, "websail", "language", l.getLangCode()); } LOG.info("warming up pagerank"); LocalPage p = pageDao.get(new DaoFilter().setLimit(1)).iterator().next(); linkDao.getPageRank(p.toLocalId()); }
private File getLocalDir(Split split) { return FileUtils.getFile( modeDir, split.getTest().getLanguage().getLangCode(), split.getGroup()); }
public GeoResolver(Env env, Config config) throws ConfigurationException { this.env = env; this.config = config; this.titleFields = config.getStringList("titles"); this.contextFields = config.getStringList("context"); this.language = env.getLanguages().getDefaultLanguage(); this.pageDao = env.getConfigurator().get(LocalPageDao.class); this.disambig = env.getConfigurator().get(Disambiguator.class, config.getString("dab"), "language", language.getLangCode()); if (this.language != Language.EN && this.language != Language.SIMPLE) { throw new IllegalArgumentException(); } initScorers(); }
public GeoResolver(Env env, Config config) throws ConfigurationException { this.env = env; this.config = config; this.titleFields = config.getStringList("titles"); this.contextFields = config.getStringList("context"); this.language = env.getLanguages().getDefaultLanguage(); this.pageDao = env.getConfigurator().get(LocalPageDao.class); this.disambig = env.getConfigurator().get(Disambiguator.class, config.getString("dab"), "language", language.getLangCode()); if (this.language != Language.EN && this.language != Language.SIMPLE) { throw new IllegalArgumentException(); } initScorers(); }
public static void main(String args[]) throws ConfigurationException, DaoException { Env env = EnvBuilder.envFromArgs(args); LocalPageDao pageDao = env.getConfigurator().get(LocalPageDao.class); // Get the default wikifier in the default language. Wikifier wikifier = env.getConfigurator().get( Wikifier.class, "websail", "language", env.getDefaultLanguage().getLangCode()); for (LocalLink link : wikifier.wikify(CORPUS)) { System.out.println("link is " + link.getAnchorText() + " is for " + pageDao.getById(link.getLanguage(), link.getDestId())); } } }
public static void main(String args[]) throws ConfigurationException, DaoException { Env env = EnvBuilder.envFromArgs(args); LocalPageDao pageDao = env.getConfigurator().get(LocalPageDao.class); // Get the default wikifier in the default language. Wikifier wikifier = env.getConfigurator().get( Wikifier.class, "websail", "language", env.getDefaultLanguage().getLangCode()); for (LocalLink link : wikifier.wikify(CORPUS)) { System.out.println("link is " + link.getAnchorText() + " is for " + pageDao.getById(link.getLanguage(), link.getDestId())); } } }