public SqlLinksLoader(LocalLinkDao dao, LocalPageDao pageDao, MetaInfoDao metaDao, File file, LocalLinkSet existing) throws DaoException { this.dao = dao; this.metaDao = metaDao; this.pageDao = pageDao; this.sqlDump = file; this.language = FileMatcher.LINK_SQL.getLanguage(file.getAbsolutePath()); int n = dao.getCount(new DaoFilter().setLanguages(language)); n = Math.max(10000, n); n *= 2 * 3; // guess that there will be twice as many links as there are now, to be safe, array size should be 3 times as big. LOG.info("guessing at size of array at " + n); this.existing = existing; }
public SqlLinksLoader(LocalLinkDao dao, LocalPageDao pageDao, MetaInfoDao metaDao, File file, LocalLinkSet existing) throws DaoException { this.dao = dao; this.metaDao = metaDao; this.pageDao = pageDao; this.sqlDump = file; this.language = FileMatcher.LINK_SQL.getLanguage(file.getAbsolutePath()); int n = dao.getCount(new DaoFilter().setLanguages(language)); n = Math.max(10000, n); n *= 2 * 3; // guess that there will be twice as many links as there are now, to be safe, array size should be 3 times as big. LOG.info("guessing at size of array at " + n); this.existing = existing; }
/** * Expects file name format starting with lang + "wiki" for example, "enwiki" * @param file */ public void load(final File file) { final Language lang = FileMatcher.ARTICLES.getLanguage(file.getAbsolutePath()); if (!keepProcessingArticles(lang)) { return; } DumpSplitter parser = new DumpSplitter(file); ParallelForEach.iterate( parser.iterator(), WpThreadUtils.getMaxThreads(), 1000, new Procedure<String>() { @Override public void call(String page) throws Exception { try { processOnePage(file, lang, page); } catch (WpParseException e) { LOG.warn("parsing of " + file.getPath() + " failed:", e); } } }, Integer.MAX_VALUE ); }
/** * Expects file name format starting with lang + "wiki" for example, "enwiki" * @param file */ public void load(final File file) { final Language lang = FileMatcher.ARTICLES.getLanguage(file.getAbsolutePath()); if (!keepProcessingArticles(lang)) { return; } DumpSplitter parser = new DumpSplitter(file); ParallelForEach.iterate( parser.iterator(), WpThreadUtils.getMaxThreads(), 1000, new Procedure<String>() { @Override public void call(String page) throws Exception { try { processOnePage(file, lang, page); } catch (WpParseException e) { LOG.warn("parsing of " + file.getPath() + " failed:", e); } } }, Integer.MAX_VALUE ); }