public static boolean isNamespaceString(String s){ return !(getNameSpaceByName(s)==null); }
/** * * @return A NameSpace: the namespace of this page */ public NameSpace getNameSpace(){ return NameSpace.getNameSpaceByValue(nameSpace.intValue()); } }
/** * Returns a namespace based on the value of that namespace defined by Wikipedia. * Never returns a disambiguation. * @param value the numeric value of the NameSpace as defined by Wikipedia * @return the corresponding NameSpace if it exists, else null */ public static NameSpace getNameSpaceByValue(int value){ if(value == 0) return NameSpace.ARTICLE; for (NameSpace v : NameSpace.values()){ if (value == v.getValue()) {return v;} } return null; } }
/** * Takes in a string and returns the correspond * @param s * @return null if the string does not match a namespace */ public static NameSpace getNameSpaceByName(String s){ s=s.toUpperCase(); for (NameSpace ns : NameSpace.values()){ if (ns.toString().replace("_"," ").equals(s)){ return ns; } } if (s.equals("")) return NameSpace.ARTICLE; else if (s.equals("WP")) return NameSpace.WIKIPEDIA; else if (s.equals("WT")) return NameSpace.WIKIPEDIA_TALK; else if (s.equals("IMAGE")) return NameSpace.FILE; else if (s.equals("IMAGE TALK")) return NameSpace.FILE_TALK; else if (s.equals("PROJECT")) return NameSpace.WIKIPEDIA; else if (s.equals("PROJECT TALK")) return NameSpace.WIKIPEDIA_TALK; else if (s.equals("MEDIAWIKI")) return NameSpace.MEDIA_WIKI; else if (s.equals("MEDIAWIKI TALK")) return NameSpace.MEDIA_WIKI_TALK; else if (s.equals("CAT")) return NameSpace.CATEGORY; else if (s.equals("MOS")) return NameSpace.WIKIPEDIA; else if (s.equals("H")) return NameSpace.HELP; else if (s.equals("P")) return NameSpace.PORTAL; else if (s.equals("T")) return NameSpace.TALK; else return null; }
/** * Build a UniversalPage from a database record representation. * Classes that extend class this should override this method. * * @param result a list of database records * @return a UniversalPage representation of the given database record * @throws DaoException if the record is not a Page */ protected UniversalPage buildUniversalPage(List<Record> result) throws DaoException { if (result == null || result.isEmpty()) { return null; } Multimap<Language, LocalId> localPages = HashMultimap.create(result.size(), result.size()); NameSpace nameSpace = NameSpace.getNameSpaceByArbitraryId(result.get(0).getValue(Tables.LOCAL_PAGE.NAME_SPACE)); for(Record record : result) { Language language = Language.getById(record.getValue(Tables.UNIVERSAL_PAGE.LANG_ID)); int pageId = record.getValue(Tables.UNIVERSAL_PAGE.PAGE_ID); localPages.put(language, new LocalId(language, pageId)); } return new UniversalPage( result.get(0).getValue(Tables.UNIVERSAL_PAGE.UNIV_ID), result.get(0).getValue(Tables.UNIVERSAL_PAGE.ALGORITHM_ID), nameSpace, localPages ); }
LocalPage parent = pageDao.getByTitle(lang, NameSpace.CATEGORY, TOP_LEVEL_PARENT); for (LocalPage page : catDao.getCategoryMembers(parent).values()) { if (page.getNameSpace().equals(NameSpace.CATEGORY)) { topLevelCategories.add(page);
/** * Sets the namespace filter to the specified collection of namespace constants. * Used by LocalPage, RawPage, and UniversalPage. * @param nameSpaces * @return */ public DaoFilter setNameSpaces(Collection<NameSpace> nameSpaces) { Collection<Short> temp = new ArrayList<Short>(); if (nameSpaces == null || nameSpaces.isEmpty()) { temp = null; } else { for (NameSpace ns : nameSpaces) { temp.add(ns.getArbitraryId()); } } this.nsIds = temp; return this; }
private static String getTitleStringWithoutNamespace(String text){ String[] parts = text.split(":",2); if (parts.length == 1 || !NameSpace.isNamespaceString(parts[0])) { return text; } else { return parts[1].trim(); } }
/** * Returns a short ID for the NameSpace. * The ID is determined arbitrarily by this Enum, and should not be referenced to * anything else. It is unrelated to NameSpace.getValue(). * @return */ public short getArbitraryId() { return (short) ordinal(); }
Language lang = Language.getByLangCode("en"); System.out.println(testClass.getByTitle(new Title("Apple", Language.getByLangCode("en")), NameSpace.getNameSpaceByArbitraryId(0))); System.out.println(testClass.getByTitle(new Title("University of Minnesota", Language.getByLangCode("en")), NameSpace.getNameSpaceByArbitraryId(0))); System.out.println(testClass.getById(lang,16308)); System.out.println(testClass.getByTitle(new Title("Apple Tree", Language.getByLangCode("en")), NameSpace.getNameSpaceByArbitraryId(0))); System.out.println("isRedirect? "+testClass.getByTitle(new Title("Apple Tree", lang), NameSpace.getNameSpaceByArbitraryId(0)).isRedirect()); System.out.println(testClass.getByTitle(new Title("Apple Tree", Language.getByLangCode("en")), NameSpace.getNameSpaceByArbitraryId(0))); System.out.println("isRedirect? "+testClass.getByTitle(new Title("Apple Tree", lang), NameSpace.getNameSpaceByArbitraryId(0)).isRedirect()); System.out.println("\tPage: " + pageTitle + "; Namespace: " + NameSpace.getNameSpaceByArbitraryId(pages.get(pageId))); pageCount++;
/** * <p> * Returns the id of the MediaWiki namespace of the revised page. The * meaning of this id depends on the configuration of the site that the page * is from. Usually, 0 is the main namespace. Even ids usually refer to * normal article pages while their odd successors represent the * corresponding talk namespace. * </p> * <p> * On a single MediaWiki site, the combination of page title and page * namespace is a key for a page at any given moment. However, users may * change the title and namespace by moving pages. The page id provides a * better clue to identify pages across history. * </p> * * @return integer namespace id */ @Override public int getNamespace() { return raw.getNamespace().getValue(); }
LocalPage parent = pageDao.getByTitle(lang, NameSpace.CATEGORY, TOP_LEVEL_PARENT); for (LocalPage page : catDao.getCategoryMembers(parent).values()) { if (page.getNameSpace().equals(NameSpace.CATEGORY)) { topLevelCategories.add(page);
public static long longHashCode(Language l, String title, NameSpace ns) { return longHashCode(l.getId(), title, ns.getArbitraryId()); }
private static String getNamespaceString(String text){ if (text.equals(":")) { return null; } String[] parts = text.split(":"); if (parts != null && parts.length > 0 && text.contains(":")&& NameSpace.isNamespaceString(parts[0])){ return parts[0]; }else{ return null; } }
/** * Used by provider only. */ private LuceneOptions(String name, Configurator configurator, String matchVersion, String luceneRoot, List<String> namespaces, TokenizerOptions options, TextFieldElements elements) { this.name = name; this.configurator = configurator; this.matchVersion = Version.parseLeniently(matchVersion); this.luceneRoot = new File(luceneRoot); this.namespaces = new ArrayList<NameSpace>(); for (String s : namespaces) { this.namespaces.add(NameSpace.getNameSpaceByName(s)); } this.options = options; this.elements = elements; }
private RawPage buildRawPage(Record record){ Timestamp timestamp = record.getValue(Tables.RAW_PAGE.LASTEDIT); return new RawPage(record.getValue(Tables.RAW_PAGE.PAGE_ID), record.getValue(Tables.RAW_PAGE.REVISION_ID), record.getValue(Tables.RAW_PAGE.TITLE), record.getValue(Tables.RAW_PAGE.BODY), new Date(timestamp.getTime()), Language.getById(record.getValue(Tables.RAW_PAGE.LANG_ID)), NameSpace.getNameSpaceByArbitraryId(record.getValue(Tables.RAW_PAGE.NAME_SPACE)), record.getValue(Tables.RAW_PAGE.IS_REDIRECT), record.getValue(Tables.RAW_PAGE.IS_DISAMBIG), record.getValue(Tables.RAW_PAGE.REDIRECT_TITLE) ); }
/** * <p> * Returns the id of the MediaWiki namespace of the revised page. The * meaning of this id depends on the configuration of the site that the page * is from. Usually, 0 is the main namespace. Even ids usually refer to * normal article pages while their odd successors represent the * corresponding talk namespace. * </p> * <p> * On a single MediaWiki site, the combination of page title and page * namespace is a key for a page at any given moment. However, users may * change the title and namespace by moving pages. The page id provides a * better clue to identify pages across history. * </p> * * @return integer namespace id */ @Override public int getNamespace() { return raw.getNamespace().getValue(); }
Integer destNamespace = (Integer) row[1]; String destTitle = (String) row[2]; NameSpace ns = NameSpace.getNameSpaceByValue(destNamespace);
@Override public void category(ParsedCategory cat) throws WikiBrainException { Language lang = cat.category.getLanguage(); try{ LanguageInfo langInfo = LanguageInfo.getByLanguage(lang); int c = counter.getAndIncrement(); if(c % 100000 == 0) LOG.info("Visited category #" + c); String catText = cat.category.getCanonicalTitle().split("\\|")[0]; //piped cat link catText = catText.split("#")[0]; //cat subsection Title catTitle = new Title(catText, langInfo); if(!isCategory(catText, langInfo) && !catTitle.getNamespace().equals(NameSpace.CATEGORY)) { throw new WikiBrainException("Thought it was a category, was not a category."); } int catId = pageDao.getIdByTitle(catTitle.getCanonicalTitle(), lang, NameSpace.CATEGORY); catMemDao.save( new LocalCategoryMember( catId, cat.location.getXml().getLocalId(), lang )); metaDao.incrementRecords(LocalCategoryMember.class, lang); } catch (DaoException e) { metaDao.incrementErrorsQuietly(LocalCategoryMember.class, lang); throw new WikiBrainException(e); } }