/** * * @return A Title: the title of this page */ public Title getTitle(Language lang){ return new Title(title, lang); } /**
private boolean isBlacklisted(LocalPage lp) { String title = lp.getTitle().getCanonicalTitle(); for (Pattern p : TITLE_BLACKLIST) { if (p.matcher(title).matches()) { return true; } } return false; }
private NameSpace getNameSpace(String title) { return new Title(title, language).getNamespace(); }
public long longHashCode() { return longHashCode(language.getLanguage(), getCanonicalTitle(), getNamespace()); }
LocalPage lp = lpDao.getByTitle(new Title(originName, Language.getByLangCode("simple")), NameSpace.ARTICLE); Integer id = wdDao.getItemId(lp); Geometry rootPoint = sdDao.getGeometry(id, layerName, "earth"); System.out.println(i.toString() + " " + wdDao.getUniversalPage(i).getBestEnglishTitle(lpDao, true).getCanonicalTitle() + " " + distanceMetrics.getDistance(resMap.get(i), rootPoint) + " km");
@Override public void category(ParsedCategory cat) throws WikiBrainException { Language lang = cat.category.getLanguage(); try{ LanguageInfo langInfo = LanguageInfo.getByLanguage(lang); int c = counter.getAndIncrement(); if(c % 100000 == 0) LOG.info("Visited category #" + c); String catText = cat.category.getCanonicalTitle().split("\\|")[0]; //piped cat link catText = catText.split("#")[0]; //cat subsection Title catTitle = new Title(catText, langInfo); if(!isCategory(catText, langInfo) && !catTitle.getNamespace().equals(NameSpace.CATEGORY)) { throw new WikiBrainException("Thought it was a category, was not a category."); } int catId = pageDao.getIdByTitle(catTitle.getCanonicalTitle(), lang, NameSpace.CATEGORY); catMemDao.save( new LocalCategoryMember( catId, cat.location.getXml().getLocalId(), lang )); metaDao.incrementRecords(LocalCategoryMember.class, lang); } catch (DaoException e) { metaDao.incrementErrorsQuietly(LocalCategoryMember.class, lang); throw new WikiBrainException(e); } }
private void loadRedirectIdsIntoMemory(Language language) throws DaoException{ redirectIdsToPageIds = new TIntIntHashMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1, -1); int i = 0; LOG.info("Begin loading redirects into memory: "); for (RawPage p : rawPages.get(new DaoFilter().setLanguages(language).setRedirect(true))) { Title pTitle = new Title(p.getRedirectTitle(), LanguageInfo.getByLanguage(language)); redirectIdsToPageIds.put(p.getLocalId(), localPages.getIdByTitle(pTitle.getCanonicalTitle(), language, pTitle.getNamespace())); if(i%100000==0) LOG.info("loading redirect # " + i); i++; } LOG.info("End loading redirects into memory."); }
public void ill(ParsedIll ill) throws WikiBrainException { RawPage page = ill.location.getXml(); try { // This format may not be easy to parse. Change it. synchronized (output) { this.output.write( page.getLanguage().getLangCode() + "\t" + page.getTitle().getCanonicalTitle() + "\t" + ill.title.getLanguage().getLangCode() + "\t" + ill.title.getCanonicalTitle() + "\n"); } count.incrementAndGet(); } catch (IOException e) { throw new WikiBrainException(e); } }
if (destTitle == null || destTitle.getNamespace() != NameSpace.ARTICLE){ continue; templateName = new Title(templateName, false, lang).toString(); // this appears to be necessary due to JWPL's handling of template names ParsedLink.SubarticleType tempSubType; tempSubType = subarticleParser.isTemplateSubarticle(templateName, templateText); Title destTitle = link2Title(templateLink); if (destTitle == null) { continue; } NameSpace type = destTitle.getNamespace(); if (type == NameSpace.ARTICLE){ ParsedLocation location = new ParsedLocation(xml, secNum, paraNum, t.getSrcSpan().getStart()); for (String dest : dests){ dest = SubarticleParser.removeTemplateAnchor(dest); Title destTitle = new Title(dest, lang); try { ParsedLocation location = new ParsedLocation(xml, secNum, paraNum, t.getSrcSpan().getStart()); continue; Title destTitle = new Title(cat.getTarget(), false, lang);
Title title = new Title(destTitle, LanguageInfo.getByLanguage(language)); int destId = pageDao.getIdByTitle(title.getTitleStringWithoutNamespace(), language, ns); if (destId < 0) {
/** * <p> * Returns the title string of the revised page, including namespace * prefixes and subpages, if any. The string is formatted as it would be on * an HTML page and not as in the URL used by MediaWiki for the page. For * example, spaces are represented as spaces and not as underscores. For * example * </p> * <p> * On a single MediaWiki site, the prefixed page title is a key for a page * at any given moment. However, users may change the title and namespace by * moving pages. The page id provides a better clue to identify pages across * history. * </p> * * @return title string */ @Override public String getPrefixedTitle() { Title t = raw.getTitle(); if (raw.getNamespace() == NameSpace.WIKIPEDIA) { return t.getTitleStringWithoutNamespace(); } else { return t.getNamespaceString() + ":" + t.getTitleStringWithoutNamespace(); } }
/** * Gets the part of the title after the first colon. If there is no * colon, returns the whole title. * @return */ public String getTitleStringWithoutNamespace(){ return getTitleStringWithoutNamespace(canonicalTitle); }
public Language getLanguage() { return title.getLanguage(); } }
private void visitLink(ParsedLocation location, Title dest, String linkText, ParsedLink.SubarticleType subType) throws WikiBrainException{ // don't want to consider within-page links Title src = location.getXml().getTitle(); if (src.toString().startsWith("#") || src.equals(dest)) { return; } ParsedLink pl = new ParsedLink(); pl.location = location; pl.target = dest; pl.text = linkText; pl.subarticleType = subType; for (ParserVisitor visitor : visitors) { try { visitor.link(pl); } catch (WikiBrainException e) { LOG.warn("beginPage failed:", e); } } }
private NameSpace getLinkType(Link link){ Title t = link2Title(link); return t == null ? null : t.getNamespace(); }
/** * Gets the "Category:" or equivalent * @return */ public String getNamespaceString(){ return getNamespaceString(this.canonicalTitle); }
LocalPage lp = lpDao.getByTitle(new Title(originName, Language.getByLangCode("simple")), NameSpace.ARTICLE); Integer id = wdDao.getItemId(lp); Geometry rootPoint = sdDao.getGeometry(id, layerName, "earth"); System.out.println(i.toString() + " " + wdDao.getUniversalPage(i).getBestEnglishTitle(lpDao, true).getCanonicalTitle() + " " + distanceMetrics.getDistance(resMap.get(i), rootPoint) + " km");
@Override public void link(ParsedLink link) throws WikiBrainException { Language lang = link.target.getLanguage(); LanguageInfo langInfo = LanguageInfo.getByLanguage(lang); String targetText = link.target.getCanonicalTitle(); link.target = new Title(targetText, langInfo); int destId = pageDao.getIdByTitle(targetText, lang, link.target.getNamespace()); LocalLink ll = new LocalLink( lang,