/** * Returns a string for the local path in which to save this dump file * @return */ public String getLocalPath() { return language.getLangCode() + "/" + date; }
public boolean containsLanguage(String langCode){ return langs.contains(Language.getByLangCode(langCode)); }
public long toLong() { long l = language.getId(); return (l << 32) | id; }
public static LanguageInfo getByLangCode(String langCode) { return LANGUAGE_INFOS[Language.getByLangCode(langCode).getId() - 1]; }
private Language getRealLang(Language lang) { if (lang.getLangCode().equals("simple")) { return Language.getByLangCode("en"); } else { return lang; } }
public String getLangCodeString() { List<String> output = Lists.newArrayList(); for (Language lang : langs) { if (lang.equals(defaultLanguage)) { output.add(lang.getLangCode().toUpperCase()); } else { output.add(lang.getLangCode()); } } Collections.sort(output); return StringUtils.join(output, ","); }
Map<Integer, SRMetric> langIdInlinkSRMetricMap = new HashMap<Integer, SRMetric>(); for(Language lang : langs.getLanguages()){ langIdEnsembleSRMetricMap.put(new Integer(lang.getId()), c.get(SRMetric.class, "ensemble", "language", lang.getLangCode())); langIdInlinkSRMetricMap.put(new Integer(lang.getId()), c.get(SRMetric.class, "inlink", "language", lang.getLangCode())); for(Language language : langs.getLanguages()){ if(! lpDao.getLoadedLanguages().containsLanguage(language)){ throw new DaoException(String.format("Language %s not loaded", language.getEnLangName())); langList.add(language); for(Language language : langList){ entries[5 + 2 * lang_counter] = "SR_ENSEMBLE_" + language.getLangCode(); entries[6 + 2 * lang_counter] = "SR_INLINK_" + language.getLangCode(); lang_counter ++; int pageId2 = lpDao.getIdByTitle(wdDao.getItem(item2).getLabels().get(language), language, NameSpace.ARTICLE); try{ entries[5 + 2 * lang_counter] = String.valueOf(langIdEnsembleSRMetricMap.get(new Integer(language.getId())).similarity(pageId1, pageId2, false).getScore()); entries[6 + 2 * lang_counter] = String.valueOf(langIdInlinkSRMetricMap.get(new Integer(language.getId())).similarity(pageId1, pageId2, false).getScore());
/** * Converts a compact url representation of a page to a LocalPage. * @param s * @return The local page, or null if the string was not a url. */ public static LocalPage fromCompactUrl(String s) { String parts[] = s.split("/", 5); if (s.startsWith("/w/") && parts.length == 5 && Language.hasLangCode(parts[2])) { return new LocalPage( Language.getByLangCode(parts[2]), Integer.valueOf(parts[3]), parts[4] ); } else { return null; } }
if (language.equals(Language.getByLangCode("simple"))) language = Language.getByLangCode("en"); // simple english if (tokenizerClasses.containsKey(language)) { // is just english return (LanguageTokenizer) tokenizerClasses.get(language)
@Override public Set<LocalId> getToDest(LocalId dest) throws DaoException { DSLContext context = getJooq(); try { Result<Record> result = context.select().from(Tables.ILL) .where(Tables.ILL.DEST_LANG_ID.equal(dest.getLanguage().getId())) .and(Tables.ILL.DEST_ID.equal(dest.getId())) .fetch(); if (result == null){ return null; } Set<LocalId> ills = new HashSet<LocalId>(); for (Record record : result) { ills.add(new LocalId( Language.getById(record.getValue(Tables.ILL.SOURCE_LANG_ID)), record.getValue(Tables.ILL.SOURCE_ID))); } return ills; } finally { freeJooq(context); } }
public static LanguageSet getLanguageSet(byte[] truncated) { Set<Language> languages = new HashSet<Language>(); boolean extra = false; for (byte b : truncated) { if (extra) { languages.add(Language.getById(b+128+255)); extra = false; } else if (b == -128) { extra = true; } else { languages.add(Language.getById(b + 128)); } } return new LanguageSet(languages); }
/** * Indexes a specific RawPage * * @param page the page to index */ public void indexPage(RawPage page) throws DaoException { if (closed) { throw new IllegalStateException("Indexer has already been closed!"); } if (!language.equals(page.getLanguage())) { throw new IllegalStateException("Language mismatch!"); } try { Document document = new Document(); Field localIdField = new IntField(LuceneOptions.LOCAL_ID_FIELD_NAME, page.getLocalId(), Field.Store.YES); Field langIdField = new IntField(LuceneOptions.LANG_ID_FIELD_NAME, page.getLanguage().getId(), Field.Store.YES); Field canonicalTitleField = builder.buildTextField(page, new TextFieldElements().addTitle()); document.add(localIdField); document.add(langIdField); document.add(canonicalTitleField); if (!page.isRedirect()) { for (LuceneOptions option : options) { document.add(builder.buildTextField(page, option.elements)); } } writer.addDocument(document); } catch (IOException e) { throw new RuntimeException(e); } }
@Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; LocalId localId = (LocalId) o; return id == localId.id && language.equals(localId.language); }
/** * Determines whether a Url is a compact representation of a title. * For example, "/w/en/1000/Hercule_Poirot" * @param s * @return */ public static boolean isCompactUrl(String s) { if (!s.startsWith("/w/")) { return false; } else { String parts[] = s.split("/"); return parts.length >= 5 && Language.hasLangCode(parts[2]); } } }
@Override public String toString() { return this.getEnLangName(); }
private Language getRealLang(Language lang) { if (lang.getLangCode().equals("simple")) { return Language.getByLangCode("en"); } else { return lang; } }
Map<Integer, SRMetric> langIdInlinkSRMetricMap = new HashMap<Integer, SRMetric>(); for(Language lang : langs.getLanguages()){ langIdEnsembleSRMetricMap.put(new Integer(lang.getId()), c.get(SRMetric.class, "ensemble", "language", lang.getLangCode())); langIdInlinkSRMetricMap.put(new Integer(lang.getId()), c.get(SRMetric.class, "inlink", "language", lang.getLangCode())); for(Language language : langs.getLanguages()){ if(! lpDao.getLoadedLanguages().containsLanguage(language)){ throw new DaoException(String.format("Language %s not loaded", language.getEnLangName())); langList.add(language); for(Language language : langList){ entries[5 + 2 * lang_counter] = "SR_ENSEMBLE_" + language.getLangCode(); entries[6 + 2 * lang_counter] = "SR_INLINK_" + language.getLangCode(); lang_counter ++; int pageId2 = lpDao.getIdByTitle(wdDao.getItem(item2).getLabels().get(language), language, NameSpace.ARTICLE); try{ entries[5 + 2 * lang_counter] = String.valueOf(langIdEnsembleSRMetricMap.get(new Integer(language.getId())).similarity(pageId1, pageId2, false).getScore()); entries[6 + 2 * lang_counter] = String.valueOf(langIdInlinkSRMetricMap.get(new Integer(language.getId())).similarity(pageId1, pageId2, false).getScore());
if (datasets.size() > 0 && !lang.equals(datasets.get(0).getLanguage())) { System.err.println("Language mismatch in datasets " + name + " and " + datasets.get(0).getName()); System.exit(1); Language language = datasets.get(0).getLanguage(); FileUtils.deleteDirectory(new File(path+cmd.getOptionValue("m")+"/"+"normalizer/")); sr = c.get(SRMetric.class,cmd.getOptionValue("m"), "language", language.getLangCode());
public Language getLanguage() { String code = getParamOrDie("lang"); if (!Language.hasLangCode(code)) { throw new WikiBrainWebException("Unknown language code: " + code); } return Language.getByLangCode(code); }
if (language.equals(Language.getByLangCode("simple"))) language = Language.getByLangCode("en"); // simple english if (tokenizerClasses.containsKey(language)) { // is just english return (LanguageTokenizer) tokenizerClasses.get(language)