private boolean isBlacklisted(LocalPage lp) { String title = lp.getTitle().getCanonicalTitle(); for (Pattern p : TITLE_BLACKLIST) { if (p.matcher(title).matches()) { return true; } } return false; }
/** * TODO: make multi lingual * @param lp * @return */ private boolean isList(LocalPage lp) { return lp.getTitle().getCanonicalTitle().toLowerCase().startsWith("list"); }
private Map pageJson(LocalPage p) { if (p == null) { return null; } Map json = new HashMap(); json.put("articleId", p.getLocalId()); json.put("title", p.getTitle().getCanonicalTitle()); json.put("lang", p.getLanguage().getLangCode()); return json; }
public String toUrl() throws WikiBrainException{ try { return "http://" + language.getLanguage().getDomain() + "/wiki/" + URLEncoder.encode(getCanonicalTitle().replaceAll(" ", "_"), "UTF-8"); } catch (UnsupportedEncodingException e) { throw new WikiBrainException(e); } }
/** * @return, for example "/w/en/1000/Hercule_Poirot" */ public String getCompactUrl() { String escapedTitle = getTitle().getCanonicalTitle().replace(" ", "_"); escapedTitle = escapedTitle.replaceAll("\\s+", ""); return "/w/" + getLanguage().getLangCode() + "/" + getLocalId() + "/" + escapedTitle; }
private String makeMentionUrl(LocalPage page) { return "/w/" + language.getLangCode() + "/" + page.getLocalId() + "/" + page.getTitle().getCanonicalTitle().replaceAll(" ", "_"); }
public void ill(ParsedIll ill) throws WikiBrainException { RawPage page = ill.location.getXml(); try { // This format may not be easy to parse. Change it. synchronized (output) { this.output.write( page.getLanguage().getLangCode() + "\t" + page.getTitle().getCanonicalTitle() + "\t" + ill.title.getLanguage().getLangCode() + "\t" + ill.title.getCanonicalTitle() + "\n"); } count.incrementAndGet(); } catch (IOException e) { throw new WikiBrainException(e); } }
public void ill(ParsedIll ill) throws WikiBrainException { RawPage page = ill.location.getXml(); try { // This format may not be easy to parse. Change it. synchronized (output) { this.output.write( page.getLanguage().getLangCode() + "\t" + page.getTitle().getCanonicalTitle() + "\t" + ill.title.getLanguage().getLangCode() + "\t" + ill.title.getCanonicalTitle() + "\n"); } count.incrementAndGet(); } catch (IOException e) { throw new WikiBrainException(e); } }
public long longHashCode() { return longHashCode(language.getLanguage(), getCanonicalTitle(), getNamespace()); }
/** * Get a map of pages by their titles * @param language the language of the pages * @param titles a Collection of page titles * @param ns the namespace of the pages * @return a map of titles to pages * @throws org.wikibrain.core.dao.DaoException if there was an error retrieving the pages */ public Map<Title, LocalPage> getByTitles(Language language, Collection<Title> titles, NameSpace ns) throws DaoException{ Map<Title, LocalPage> pageMap = new HashMap<Title, LocalPage>(); for(Title title : titles){ LiveAPIQuery.LiveAPIQueryBuilder builder = new LiveAPIQuery.LiveAPIQueryBuilder("INFO", language) .addTitle(title.getCanonicalTitle().replace(" ", "_")).setRedirects(followRedirects); QueryReply info = builder.build().getValuesFromQueryResult().get(0); pageMap.put(title, (LocalPage)info.getLocalPage(language)); } return pageMap; }
/** * Get an id from a title. Returns -1 if it doesn't exist. * @param title * @return */ public int getIdByTitle(Title title) throws DaoException{ LiveAPIQuery.LiveAPIQueryBuilder builder = new LiveAPIQuery.LiveAPIQueryBuilder("INFO", title.getLanguage()) .addTitle(title.getCanonicalTitle().replace(" ", "_")).setRedirects(followRedirects); QueryReply info = builder.build().getValuesFromQueryResult().get(0); return info.getId(); }
private void doWikify(WikiBrainWebRequest req) throws ConfigurationException, DaoException { Language lang = req.getLanguage(); Wikifier wf = env.getConfigurator().get(Wikifier.class, "websail", "language", lang.getLangCode()); String text = req.getParamOrDie("text"); List jsonConcepts = new ArrayList(); for (LocalLink ll : wf.wikify(text)) { LocalPage page = pageDao.getById(lang, ll.getDestId()); Map obj = new HashMap(); obj.put("index", ll.getLocation()); obj.put("text", ll.getAnchorText()); obj.put("lang", lang.getLangCode()); obj.put("articleId", ll.getDestId()); obj.put("title", page == null ? "Unknown" : page.getTitle().getCanonicalTitle()); jsonConcepts.add(obj); } req.writeJsonResponse("text", text, "references", jsonConcepts); }
/** * Get a single page by its title * * @param title the page's title * @param ns the page's namespace * @return the requested LocalPage * @throws org.wikibrain.core.dao.DaoException if there was an error retrieving the page */ public LocalPage getByTitle(Title title, NameSpace ns) throws DaoException{ Language lang = title.getLanguage(); LiveAPIQuery.LiveAPIQueryBuilder builder = new LiveAPIQuery.LiveAPIQueryBuilder("INFO", lang) .addTitle(title.getCanonicalTitle().replace(" ", "_")).setRedirects(followRedirects); QueryReply info = builder.build().getValuesFromQueryResult().get(0); return (LocalPage)info.getLocalPage(lang); }
private void loadRedirectIdsIntoMemory(Language language) throws DaoException{ redirectIdsToPageIds = new TIntIntHashMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1, -1); int i = 0; LOG.info("Begin loading redirects into memory: "); for (RawPage p : rawPages.get(new DaoFilter().setLanguages(language).setRedirect(true))) { Title pTitle = new Title(p.getRedirectTitle(), LanguageInfo.getByLanguage(language)); redirectIdsToPageIds.put(p.getLocalId(), localPages.getIdByTitle(pTitle.getCanonicalTitle(), language, pTitle.getNamespace())); if(i%100000==0) LOG.info("loading redirect # " + i); i++; } LOG.info("End loading redirects into memory."); }
private void loadRedirectIdsIntoMemory(Language language) throws DaoException{ redirectIdsToPageIds = new TIntIntHashMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1, -1); int i = 0; LOG.info("Begin loading redirects into memory: "); for (RawPage p : rawPages.get(new DaoFilter().setLanguages(language).setRedirect(true))) { Title pTitle = new Title(p.getRedirectTitle(), LanguageInfo.getByLanguage(language)); redirectIdsToPageIds.put(p.getLocalId(), localPages.getIdByTitle(pTitle.getCanonicalTitle(), language, pTitle.getNamespace())); if(i%100000==0) LOG.info("loading redirect # " + i); i++; } LOG.info("End loading redirects into memory."); }
public static void main(String args[]) throws ConfigurationException, DaoException { // Setup environment Env env = EnvBuilder.envFromArgs(args); LocalPageDao pageDao = env.getConfigurator().get(LocalPageDao.class); UniversalPageDao conceptDao = env.getConfigurator().get(UniversalPageDao.class); // Get local and universal pages LocalPage page = pageDao.getByTitle(Language.EN, "Apple"); UniversalPage concept = conceptDao.getByLocalPage(page); // Translate to other languages. System.out.format("%s in other languages:\n", page.getTitle()); for (Language lang : concept.getLanguageSet()) { LocalPage page2 = pageDao.getById(lang, concept.getLocalId(lang)); System.out.format("%s: %s\n", lang.toString(), page2.getTitle().getCanonicalTitle()); } }
public static void main(String args[]) throws ConfigurationException, DaoException { // Setup environment Env env = EnvBuilder.envFromArgs(args); LocalPageDao pageDao = env.getConfigurator().get(LocalPageDao.class); UniversalPageDao conceptDao = env.getConfigurator().get(UniversalPageDao.class); // Get local and universal pages LocalPage page = pageDao.getByTitle(Language.EN, "Apple"); UniversalPage concept = conceptDao.getByLocalPage(page); // Translate to other languages. System.out.format("%s in other languages:\n", page.getTitle()); for (Language lang : concept.getLanguageSet()) { LocalPage page2 = pageDao.getById(lang, concept.getLocalId(lang)); System.out.format("%s: %s\n", lang.toString(), page2.getTitle().getCanonicalTitle()); } }
@Override public void save(LocalPage page) throws DaoException { insert( page.getLanguage().getId(), page.getLocalId(), page.getTitle().getCanonicalTitle(), page.getNameSpace().getArbitraryId(), page.isRedirect(), page.isDisambig() ); }
@Override public void save(RawPage page) throws DaoException { insert( page.getLanguage().getId(), page.getLocalId(), page.getRevisionId(), page.getBody() == null ? "" : page.getBody(), page.getTitle().getCanonicalTitle(), page.getLastEdit(), page.getNamespace().getArbitraryId(), page.isRedirect(), page.isDisambig(), page.getRedirectTitle() ); }
@Override public LocalPage getByTitle(Title title, NameSpace nameSpace) throws DaoException { DSLContext context = getJooq(); try { Record record = context.select(). from(Tables.LOCAL_PAGE). where(Tables.LOCAL_PAGE.TITLE.eq(title.getCanonicalTitle())). and(Tables.LOCAL_PAGE.LANG_ID.eq(title.getLanguage().getId())). and(Tables.LOCAL_PAGE.NAME_SPACE.eq(nameSpace.getArbitraryId())). limit(1). fetchOne(); return buildLocalPage(record); } finally { freeJooq(context); } }