public LanguageInfo getLanguageInfo() { return LanguageInfo.getByLanguage(this); }
public RedirectParser(Language language) { this.language = LanguageInfo.getByLanguage(language); }
public Title(String title, Language language) { this(title, LanguageInfo.getByLanguage(language)); }
public RawPage(int localId, int revisionId, String title, String body, Date lastEdit, Language lang, NameSpace namespace) { this.title = new Title(title, LanguageInfo.getByLanguage(lang)); this.body = body; this.lastEdit = lastEdit; this.namespace = namespace; this.lang = lang; this.revisionId = revisionId; this.localId = localId; isRedirect = false; isDisambig = false; }
public RawPage(int localId, int revisionId, String title, String body, Date lastEdit, Language lang, NameSpace namespace, boolean redirect, boolean disambig, String redirectTitle) { this.title = new Title(title, LanguageInfo.getByLanguage(lang)); this.body = body; this.lastEdit = lastEdit; this.lang = lang; this.revisionId = revisionId; this.localId = localId; this.namespace = namespace; isRedirect = redirect; isDisambig = disambig; this.redirectTitle = redirectTitle; }
private void processOnePage(File file, Language lang, String page) throws WpParseException { if (!keepProcessingArticles(lang)) { return; } if (allPages.incrementAndGet() % 10000 == 0) { LOG.info("processing article " + allPages.get() + " found " + interestingPages.get() + " interesting articles"); } PageXmlParser parser = new PageXmlParser(LanguageInfo.getByLanguage(lang)); RawPage rp = parser.parse(page); if (isInteresting(rp)) { interestingPages.incrementAndGet(); save(file, rp); incrementLangCount(lang); } }
private void processOnePage(File file, Language lang, String page) throws WpParseException { if (!keepProcessingArticles(lang)) { return; } if (allPages.incrementAndGet() % 10000 == 0) { LOG.info("processing article " + allPages.get() + " found " + interestingPages.get() + " interesting articles"); } PageXmlParser parser = new PageXmlParser(LanguageInfo.getByLanguage(lang)); RawPage rp = parser.parse(page); if (isInteresting(rp)) { interestingPages.incrementAndGet(); save(file, rp); incrementLangCount(lang); } }
public double estimateDownloadMegabytes(LanguageSet langs) { int numArticles = 0; int numLinks = 0; for (Language lang : langs) { LanguageInfo li = LanguageInfo.getByLanguage(lang); numLinks += li.getNumLinks(); numArticles += li.getNumArticles(); } Evaluator mathEvaluator = new Evaluator(); Map<String, String> variables = new HashMap<String, String>(); variables.put("links", ""+numLinks); variables.put("articles", ""+numArticles); mathEvaluator.setVariables(variables); try { return mathEvaluator.getNumberResult(downloadEstimateEquation); } catch (EvaluationException e) { throw new RuntimeException(e); } }
public double estimateDiskMegabytes(LanguageSet langs) { int numArticles = 0; int numLinks = 0; for (Language lang : langs) { LanguageInfo li = LanguageInfo.getByLanguage(lang); numLinks += li.getNumLinks(); numArticles += li.getNumArticles(); } Evaluator mathEvaluator = new Evaluator(); Map<String, String> variables = new HashMap<String, String>(); variables.put("links", ""+numLinks); variables.put("articles", ""+numArticles); mathEvaluator.setVariables(variables); try { return mathEvaluator.getNumberResult(diskEstimateEquation); } catch (EvaluationException e) { throw new RuntimeException(e); } }
public double estimateDiskMegabytes(LanguageSet langs) { int numArticles = 0; int numLinks = 0; for (Language lang : langs) { LanguageInfo li = LanguageInfo.getByLanguage(lang); numLinks += li.getNumLinks(); numArticles += li.getNumArticles(); } Evaluator mathEvaluator = new Evaluator(); Map<String, String> variables = new HashMap<String, String>(); variables.put("links", ""+numLinks); variables.put("articles", ""+numArticles); mathEvaluator.setVariables(variables); try { return mathEvaluator.getNumberResult(diskEstimateEquation); } catch (EvaluationException e) { throw new RuntimeException(e); } }
public double estimateDownloadMegabytes(LanguageSet langs) { int numArticles = 0; int numLinks = 0; for (Language lang : langs) { LanguageInfo li = LanguageInfo.getByLanguage(lang); numLinks += li.getNumLinks(); numArticles += li.getNumArticles(); } Evaluator mathEvaluator = new Evaluator(); Map<String, String> variables = new HashMap<String, String>(); variables.put("links", ""+numLinks); variables.put("articles", ""+numArticles); mathEvaluator.setVariables(variables); try { return mathEvaluator.getNumberResult(downloadEstimateEquation); } catch (EvaluationException e) { throw new RuntimeException(e); } }
public double estimateSeconds(LanguageSet langs) { int numArticles = 0; int numLinks = 0; for (Language lang : langs) { LanguageInfo li = LanguageInfo.getByLanguage(lang); numLinks += li.getNumLinks(); numArticles += li.getNumArticles(); } Evaluator mathEvaluator = new Evaluator(); Map<String, String> variables = new HashMap<String, String>(); variables.put("singleCoreSpeed", ""+CpuBenchmarker.getSingleCoreSpeed()); variables.put("multiCoreSpeed", ""+CpuBenchmarker.getMultiCoreSpeed()); variables.put("links", ""+numLinks); variables.put("articles", ""+numArticles); mathEvaluator.setVariables(variables); try { return mathEvaluator.getNumberResult(timeEstimateEquation); } catch (EvaluationException e) { throw new RuntimeException(e); } }
public double estimateSeconds(LanguageSet langs) { int numArticles = 0; int numLinks = 0; for (Language lang : langs) { LanguageInfo li = LanguageInfo.getByLanguage(lang); numLinks += li.getNumLinks(); numArticles += li.getNumArticles(); } Evaluator mathEvaluator = new Evaluator(); Map<String, String> variables = new HashMap<String, String>(); variables.put("singleCoreSpeed", ""+CpuBenchmarker.getSingleCoreSpeed()); variables.put("multiCoreSpeed", ""+CpuBenchmarker.getMultiCoreSpeed()); variables.put("links", ""+numLinks); variables.put("articles", ""+numArticles); mathEvaluator.setVariables(variables); try { return mathEvaluator.getNumberResult(timeEstimateEquation); } catch (EvaluationException e) { throw new RuntimeException(e); } }
ParsedIll pill = new ParsedIll(); pill.location = new ParsedLocation(xml, -1, -1, ill.getSrcSpan().getStart()); pill.title = new Title(target, false, LanguageInfo.getByLanguage(l)); visitIll(pill);
Title title = new Title(destTitle, LanguageInfo.getByLanguage(language)); int destId = pageDao.getIdByTitle(title.getTitleStringWithoutNamespace(), language, ns); if (destId < 0) {
private void loadRedirectIdsIntoMemory(Language language) throws DaoException{ redirectIdsToPageIds = new TIntIntHashMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1, -1); int i = 0; LOG.info("Begin loading redirects into memory: "); for (RawPage p : rawPages.get(new DaoFilter().setLanguages(language).setRedirect(true))) { Title pTitle = new Title(p.getRedirectTitle(), LanguageInfo.getByLanguage(language)); redirectIdsToPageIds.put(p.getLocalId(), localPages.getIdByTitle(pTitle.getCanonicalTitle(), language, pTitle.getNamespace())); if(i%100000==0) LOG.info("loading redirect # " + i); i++; } LOG.info("End loading redirects into memory."); }
public void writeAllData(String path) throws IOException { CsvListWriter writer = new CsvListWriter(WpIOUtils.openWriter(path), CsvPreference.STANDARD_PREFERENCE); writer.write(Arrays.asList("stage", "singleCoreSpeed", "multiCoreSpeed", "numLinks", "numArticles", "elapsed")); for (StageDiagnostic diagnostic : diagnostics) { int numArticles = 0; int numLinks = 0; for (Language l : diagnostic.getLangs()) { numLinks += LanguageInfo.getByLanguage(l).getNumLinks(); numArticles += LanguageInfo.getByLanguage(l).getNumArticles(); } writer.write(Arrays.asList( diagnostic.getStage(), diagnostic.getSingleCoreSpeed(), diagnostic.getMultiCoreSpeed(), numLinks, numArticles, diagnostic.getElapsedSeconds() )); } writer.close(); }
private void loadRedirectIdsIntoMemory(Language language) throws DaoException{ redirectIdsToPageIds = new TIntIntHashMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1, -1); int i = 0; LOG.info("Begin loading redirects into memory: "); for (RawPage p : rawPages.get(new DaoFilter().setLanguages(language).setRedirect(true))) { Title pTitle = new Title(p.getRedirectTitle(), LanguageInfo.getByLanguage(language)); redirectIdsToPageIds.put(p.getLocalId(), localPages.getIdByTitle(pTitle.getCanonicalTitle(), language, pTitle.getNamespace())); if(i%100000==0) LOG.info("loading redirect # " + i); i++; } LOG.info("End loading redirects into memory."); }
public void writeAllData(String path) throws IOException { CsvListWriter writer = new CsvListWriter(WpIOUtils.openWriter(path), CsvPreference.STANDARD_PREFERENCE); writer.write(Arrays.asList("stage", "singleCoreSpeed", "multiCoreSpeed", "numLinks", "numArticles", "elapsed")); for (StageDiagnostic diagnostic : diagnostics) { int numArticles = 0; int numLinks = 0; for (Language l : diagnostic.getLangs()) { numLinks += LanguageInfo.getByLanguage(l).getNumLinks(); numArticles += LanguageInfo.getByLanguage(l).getNumArticles(); } writer.write(Arrays.asList( diagnostic.getStage(), diagnostic.getSingleCoreSpeed(), diagnostic.getMultiCoreSpeed(), numLinks, numArticles, diagnostic.getElapsedSeconds() )); } writer.close(); }
@Override public void category(ParsedCategory cat) throws WikiBrainException { Language lang = cat.category.getLanguage(); try{ LanguageInfo langInfo = LanguageInfo.getByLanguage(lang); int c = counter.getAndIncrement(); if(c % 100000 == 0) LOG.info("Visited category #" + c); String catText = cat.category.getCanonicalTitle().split("\\|")[0]; //piped cat link catText = catText.split("#")[0]; //cat subsection Title catTitle = new Title(catText, langInfo); if(!isCategory(catText, langInfo) && !catTitle.getNamespace().equals(NameSpace.CATEGORY)) { throw new WikiBrainException("Thought it was a category, was not a category."); } int catId = pageDao.getIdByTitle(catTitle.getCanonicalTitle(), lang, NameSpace.CATEGORY); catMemDao.save( new LocalCategoryMember( catId, cat.location.getXml().getLocalId(), lang )); metaDao.incrementRecords(LocalCategoryMember.class, lang); } catch (DaoException e) { metaDao.incrementErrorsQuietly(LocalCategoryMember.class, lang); throw new WikiBrainException(e); } }