@Override public void save(LocalPage page) throws DaoException { insert( page.getLanguage().getId(), page.getLocalId(), page.getTitle().getCanonicalTitle(), page.getNameSpace().getArbitraryId(), page.isRedirect(), page.isDisambig() ); }
@Override public UniversalPage transform(Object obj) { LocalPage page = (LocalPage) obj; Multimap<Language, LocalId> map = HashMultimap.create(); map.put(page.getLanguage(), page.toLocalId()); return new UniversalPage( nextUnivId.getAndIncrement(), getId(), page.getNameSpace(), map ); } };
private String describePage(Language language, Integer id) throws DaoException { LocalPage page = pageDao.getById(language, id); return (page == null) ? "unknown" : page.toString(); }
public boolean equals(Object o){ if (o instanceof LocalPage){ LocalPage input = (LocalPage)o; return (input.getLanguage().equals(this.getLanguage()) && input.getLocalId() == this.getLocalId() ); } else { return false; } }
/** * Returns true if and only if the page is valid within the * parameters of this DaoFilter. * @param page * @return */ public boolean isValidLocalPage(LocalPage page) { return page != null && (langIds == null || langIds.contains(page.getLanguage().getId())) && (nsIds == null || nsIds.contains(page.getNameSpace().getArbitraryId())) && (isRedirect == null || isRedirect == page.isRedirect()) && (isDisambig == null || isDisambig == page.isDisambig()); }
/** * TODO: make multi lingual * @param lp * @return */ private boolean isList(LocalPage lp) { return lp.getTitle().getCanonicalTitle().toLowerCase().startsWith("list"); }
/** * @return, for example "/w/en/1000/Hercule_Poirot" */ public String getCompactUrl() { String escapedTitle = getTitle().getCanonicalTitle().replace(" ", "_"); escapedTitle = escapedTitle.replaceAll("\\s+", ""); return "/w/" + getLanguage().getLangCode() + "/" + getLocalId() + "/" + escapedTitle; }
private String makeMentionUrl(LocalPage page) { return "/w/" + language.getLangCode() + "/" + page.getLocalId() + "/" + page.getTitle().getCanonicalTitle().replaceAll(" ", "_"); }
System.out.println("isRedirect? "+testClass.getByTitle(new Title("Apple Tree", lang), NameSpace.getNameSpaceByArbitraryId(0)).isRedirect()); System.out.println("isRedirect? "+testClass.getByTitle(new Title("Apple Tree", lang), NameSpace.getNameSpaceByArbitraryId(0)).isRedirect()); System.out.println("isDisambig? "+testClass.getById(lang,32672164).isDisambig()); int categoryId = allCategoryPageIds.get(i); LocalPage category = testClass.getById(lang, categoryId); System.out.println("\t" + categoryId + ": " + category.getTitle()); break; Title pageTitle = testClass.getById(simple, pageId).getTitle(); System.out.println("\tPage: " + pageTitle + "; Namespace: " + NameSpace.getNameSpaceByArbitraryId(pages.get(pageId))); pageCount++;
if ((lp.getNameSpace() != NameSpace.ARTICLE) || (lp.isDisambig()) || (lp.isRedirect()) || (isBlacklisted(lp)) || (isList(lp))) { DaoFilter query = new DaoFilter().setLanguages(lang).setDestIds(lp.getLocalId()); int n = linkDao.getCount(query); mostLinked.tallyScore(lp.getLocalId(), n);
@Override public void call(LocalPage page) throws Exception { int views = allViews.get(page.getLocalId()); LocalPage cat = catDao.getClosestCategory(page, topLevelCategories, true); if (cat != null) { if (articleCounts.containsKey(cat)) { articleCounts.put(cat, articleCounts.get(cat) + 1); viewCounts.put(cat, viewCounts.get(cat) + views); } else { articleCounts.put(cat, 1); viewCounts.put(cat, views); } if (numPages.incrementAndGet() % 10000 == 0) { System.err.println("doing page " + numPages.get()); } } } });
LocalPage parent = pageDao.getByTitle(lang, NameSpace.CATEGORY, TOP_LEVEL_PARENT); for (LocalPage page : catDao.getCategoryMembers(parent).values()) { if (page.getNameSpace().equals(NameSpace.CATEGORY)) { topLevelCategories.add(page); System.out.format("%s\t%d\t%d\n", page.getTitle().getCanonicalTitle(), articleCounts.get(page), viewCounts.get(page) );
private TextField buildTextField(LocalPage localPage, RawPage rawPage, TextFieldElements elements) throws DaoException { StringBuilder sb = new StringBuilder(); String title = rawPage.getTitle().getCanonicalTitle(); for (int i=0; i<elements.usesTitle(); i++) { sb.append(title); sb.append(" "); } if (elements.usesRedirects()) { TIntIterator iterator = redirectDao.getRedirects(localPage).iterator(); while (iterator.hasNext()) { sb.append(localPageDao .getById(localPage.getLanguage(), iterator.next()) .getTitle() .getCanonicalTitle()); sb.append(" "); } } if (elements.usesPlainText()) { String plainText = rawPage.getPlainText(); sb.append(plainText); } return new TextField(elements.getTextFieldName(), sb.toString().trim(), Field.Store.YES); }
@Override public Map<Integer, LocalPage> getCategoryMembers(LocalPage localCategory) throws DaoException { Collection<Integer> articleIds = getCategoryMemberIds(localCategory); return localPageDao.getByIds(localCategory.getLanguage(), articleIds); }
for (int id : pageIds) { LocalPage p = pageDao.getById(lang, id); if (p != null) System.out.format("\t%.3f %s\n", result.get(c).get(id), p.toString()); TIntDoubleMap distances = catDao.getCategoryDistances(topLevelCats, p.getLocalId(), true); System.out.println("distances to top-level categories for " + p); for (int catId : WpCollectionUtils.sortMapKeys(distances, false)) { LocalPage c = pageDao.getById(lang, catId); if (c != null) System.out.format("\t%.3f %s\n", distances.get(catId), c.toString());
private void save(File file, RawPage rp) { try { rawPageDao.save(rp); metaDao.incrementRecords(rp.getClass(), rp.getLanguage()); } catch (Exception e) { LOG.warn("parsing of " + file + " failed:", e); metaDao.incrementErrorsQuietly(rp.getClass(), rp.getLanguage()); } try { LocalPage lp = new LocalPage( rp.getLanguage(), rp.getLocalId(), rp.getTitle(), rp.getNamespace(), rp.isRedirect(), rp.isDisambig() ); localPageDao.save(lp); metaDao.incrementRecords(lp.getClass(), lp.getLanguage()); } catch (Exception e) { LOG.warn("parsing of " + file + " failed:", e); metaDao.incrementErrorsQuietly(LocalPage.class, rp.getLanguage()); } }
/** * Returns a set of local ids from a collection of local pages * @param localPages * @return */ public static Set<LocalId> toLocalIds(Iterable<LocalPage> localPages){ Set<LocalId> rVal = Sets.newHashSet(); for (LocalPage localPage : localPages){ rVal.add(localPage.toLocalId()); } return rVal; }
if (!articleIndexes.containsKey(page.getLocalId())) { String w = page.getCompactUrl(); long h = hashWord(w); if (wordIndexes.containsKey(h)) { wordIndexes.put(h, i); top.add(w); articleIndexes.put(page.getLocalId(), i); wordCounts.put(h, ARTICLE_COUNT_BONUS);
@Override public Set<LocalPage> guessTopLevelCategories(Language language) throws DaoException { int topLevelId = -1; Title override = topLevelLangOverrides.get(language); if (override != null) { System.out.println("title is " + override); topLevelId = localPageDao.getIdByTitle(override); if (topLevelId < 0) { LOG.warn("top level category {} for language {} not found.", override, language); } } if (topLevelId < 0) { if (univDao == null) { throw new DaoException("Universal dao required for top level categories."); } topLevelId = univDao.getLocalId(language, TOP_LEVEL_CONCEPT); } Set<LocalPage> result = new HashSet<LocalPage>(); if (topLevelId < 0) { return result; } for (int id : getCategoryMemberIds(language, topLevelId)) { LocalPage page = localPageDao.getById(language, id); if (page.getNameSpace() == NameSpace.CATEGORY) { result.add(page); } } return result; }
public LocalCategoryMember(LocalPage localCategory, LocalPage localArticle) throws WikiBrainException { if (!localArticle.getLanguage().equals(localCategory.getLanguage())) { throw new WikiBrainException("Language Mismatch"); } this.categoryId = localCategory.getLocalId(); this.articleId = localArticle.getLocalId(); this.language = localCategory.getLanguage(); }