/** * Creates a new page in the main namespace that is NOT a redirect or disambig. * @param language * @param localId * @param title */ public LocalPage(Language language, int localId, String title) { this(language, localId, new Title(title, language), NameSpace.ARTICLE); }
/** * * @return A Title: the title of this page */ public Title getTitle(Language lang){ return new Title(title, lang); } /**
public void addTopLevelOverride(Language language, String topLevelTitle) { this.topLevelLangOverrides.put(language, new Title(topLevelTitle, language)); }
public RawPage(int localId, int revisionId, String title, String body, Date lastEdit, Language lang, NameSpace namespace) { this.title = new Title(title, LanguageInfo.getByLanguage(lang)); this.body = body; this.lastEdit = lastEdit; this.namespace = namespace; this.lang = lang; this.revisionId = revisionId; this.localId = localId; isRedirect = false; isDisambig = false; }
/** * Needs langId because Title does not store it for memory reasons * @return */ public Title toUpperCase(){ String upTitle = this.toString().toUpperCase(); return new Title(upTitle, true, language); }
@Override public LocalPage getByTitle(Language language, NameSpace ns, String title) throws DaoException { return getByTitle(new Title(title, language), ns); }
public Title getRedirect(String body) { String title = extractSingleString(redirectPattern, body, 1); return new Title(title, language); }
public RawPage(int localId, int revisionId, String title, String body, Date lastEdit, Language lang, NameSpace namespace, boolean redirect, boolean disambig, String redirectTitle) { this.title = new Title(title, LanguageInfo.getByLanguage(lang)); this.body = body; this.lastEdit = lastEdit; this.lang = lang; this.revisionId = revisionId; this.localId = localId; this.namespace = namespace; isRedirect = redirect; isDisambig = disambig; this.redirectTitle = redirectTitle; }
@Override public LocalPage getByTitle(Language language, NameSpace ns, String title) throws DaoException { return getByTitle(new Title(title, language), ns); }
private NameSpace getNameSpace(String title) { return new Title(title, language).getNamespace(); }
@Override public LinkedHashMap<LocalId, Float> resolve(Language language, String phrase, int maxPages) throws DaoException { LinkedHashMap<LocalId, Float> result = new LinkedHashMap<LocalId, Float>(); if (maxPages < 1) return result; int pageId = lpDao.getIdByTitle(new Title(phrase, language)); if (pageId >= 0) { result.put(new LocalId(language, pageId), 1.0f); } return result; }
public static void main(String args[]) throws ConfigurationException, DaoException, IOException { Env env = new EnvBuilder().build(); Configurator c = env.getConfigurator(); Language lang = env.getDefaultLanguage(); // simple english PhraseAnalyzer pa = c.get(PhraseAnalyzer.class, "anchortext"); LocalPageDao pageDao = c.get(LocalPageDao.class); LocalPage page = pageDao.getByTitle(new Title("Obama", lang), NameSpace.ARTICLE); System.out.println("description of " + page + ":"); // should resolve redirect to Barack Obama LinkedHashMap<String, Float> description = pa.describe(lang, page, 100); if (description == null) { System.out.println("\tno description!"); } else { for (String phrase : description.keySet()) { System.out.println("\t" + phrase + ": " + description.get(phrase)); } } } }
@Override public Geometry getGeometry(String articleName, Language language, String layerName, String refSysName) throws DaoException { LocalPage lp = localPageDao.getByTitle(new Title(articleName, language), NameSpace.ARTICLE); if (lp == null) return null; Integer id = wikidataDao.getItemId(lp); if (id == null) throw new DaoException("Could not find Wikidata item for \"" + lp.toString() + "\""); return getGeometry(id, layerName); }
private Title link2Title(Link link) { if (link.getType().equals(Link.type.INTERNAL) || link.getType().equals(Link.type.UNKNOWN)) { return new Title(link.getTarget(), lang); } else { return null; } }
@Override public Geometry getGeometry(String articleName, Language language, String layerName, String refSysName) throws DaoException { LocalPage lp = localPageDao.getByTitle(new Title(articleName, language), NameSpace.ARTICLE); if (lp == null) return null; Integer id = wikidataDao.getItemId(lp); if (id == null) throw new DaoException("Could not find Wikidata item for \"" + lp.toString() + "\""); return getGeometry(id, layerName); }
private void loadRedirectIdsIntoMemory(Language language) throws DaoException{ redirectIdsToPageIds = new TIntIntHashMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1, -1); int i = 0; LOG.info("Begin loading redirects into memory: "); for (RawPage p : rawPages.get(new DaoFilter().setLanguages(language).setRedirect(true))) { Title pTitle = new Title(p.getRedirectTitle(), LanguageInfo.getByLanguage(language)); redirectIdsToPageIds.put(p.getLocalId(), localPages.getIdByTitle(pTitle.getCanonicalTitle(), language, pTitle.getNamespace())); if(i%100000==0) LOG.info("loading redirect # " + i); i++; } LOG.info("End loading redirects into memory."); }
private void loadRedirectIdsIntoMemory(Language language) throws DaoException{ redirectIdsToPageIds = new TIntIntHashMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1, -1); int i = 0; LOG.info("Begin loading redirects into memory: "); for (RawPage p : rawPages.get(new DaoFilter().setLanguages(language).setRedirect(true))) { Title pTitle = new Title(p.getRedirectTitle(), LanguageInfo.getByLanguage(language)); redirectIdsToPageIds.put(p.getLocalId(), localPages.getIdByTitle(pTitle.getCanonicalTitle(), language, pTitle.getNamespace())); if(i%100000==0) LOG.info("loading redirect # " + i); i++; } LOG.info("End loading redirects into memory."); }
public static void main(String args[]) throws ConfigurationException, DaoException { // The following ten-line dance to get an env is awkward and repeated over and over. // Figure out a good way to consolidate it. Env env = EnvBuilder.envFromArgs(args); Configurator configurator = env.getConfigurator(); LocalPageDao lpDao = configurator.get(LocalPageDao.class); LocalLinkDao sqlDao = configurator.get(LocalLinkDao.class, "sql"); Language simple = env.getLanguages().getDefaultLanguage(); LocalPage page = lpDao.getByTitle(new Title("List of Soundgarden band members", simple), NameSpace.ARTICLE); System.out.println("page is " + page); DaoFilter filter = new DaoFilter().setSourceIds(page.getLocalId()).setLanguages(simple); for (LocalLink link : sqlDao.get(filter)) { System.out.println("link is: " + link); } } }
public static void main(String args[]) throws ConfigurationException, DaoException { // The following ten-line dance to get an env is awkward and repeated over and over. // Figure out a good way to consolidate it. Env env = EnvBuilder.envFromArgs(args); Configurator configurator = env.getConfigurator(); LocalPageDao lpDao = configurator.get(LocalPageDao.class); LocalLinkDao sqlDao = configurator.get(LocalLinkDao.class, "sql"); Language simple = env.getLanguages().getDefaultLanguage(); LocalPage page = lpDao.getByTitle(new Title("List of Soundgarden band members", simple), NameSpace.ARTICLE); System.out.println("page is " + page); DaoFilter filter = new DaoFilter().setSourceIds(page.getLocalId()).setLanguages(simple); for (LocalLink link : sqlDao.get(filter)) { System.out.println("link is: " + link); } } }
private void parseCategory(RawPage xml, ParsedPage pp){ // handle categories for (Link cat : pp.getCategories()){ Title destTitle = new Title(cat.getTarget(), lang); // TODO: ensure title is a category ParsedCategory pc = new ParsedCategory(); pc.location = new ParsedLocation(xml, -1, -1, cat.getSrcSpan().getStart()); pc.category = destTitle; visitCategory(pc); } // handle ILLs parseIlls(xml, pp); }