/** * * @param wpId * @return */ public int[] getCategoryPages(int wpId) { int parentIndex = catIdToIndex(wpId); if (parentIndex < 0) { return new int[0]; } return catPages[parentIndex]; }
/** * * @param wpId * @return */ public String getCategoryName(int wpId){ int cid = catIdToIndex(wpId); //sparse to dense if(cid >=0) { String cname = cats[cid]; return cname; }else{ return ""; } }
/** * * @param wpId * @param max * @return */ public Integer getMaxMinParentPageRank(int wpId, boolean max){ int[] parents = getFamilyMembersCategories(wpId, "parent"); Integer arg = null; if (max) { for (int par : parents) { if (catIdToIndex(par) >= 0) { arg = (arg == null || (catCosts[catIdToIndex(arg)] < catCosts[catIdToIndex(par)])) ? par : arg; } } } else { for (int par : parents) { if (catIdToIndex(par) >= 0) { arg = (arg == null || (catCosts[catIdToIndex(arg)] > catCosts[catIdToIndex(par)])) ? par : arg; } } } return ((arg != null) ? arg: -1); } }
public double getCategoryDistance(int categoryId) { return catDistances.get(graph.catIdToIndex(categoryId)); } public double getCategoryDistanceForIndex(int catIndex) {
public boolean hasCategoryDistance(int pageId) { return catDistances.containsKey(graph.catIdToIndex(pageId)); } public double getCategoryDistance(int categoryId) {
/** * Return the wikipedia page ids for child of the specified category * @param wpId * @return */ public int[] getFamilyMembersCategories(int wpId, String familyMember) { int index = catIdToIndex(wpId); if (index < 0) { return new int[0]; } int[] denseIds; if(familyMember == "child") { denseIds = catChildren[index]; //gets the children of index }else if(familyMember == "parent"){ denseIds = catParents[index]; //gets theparents of index }else{ return new int[0]; } int famMembersIds[] = new int[denseIds.length]; for (int i = 0; i < denseIds.length; i++) { famMembersIds[i] = catIndexToId(denseIds[i]); } return famMembersIds; }
public CategoryBfs(CategoryGraph graph, int startId, NameSpace startNamespace, Language language, int maxResults, TIntSet validWpIds, LocalCategoryMemberDao categoryMemberDao, int direction) throws DaoException { this.startPage = startId; this.maxResults = maxResults; this.graph = graph; this.validWpIds = validWpIds; this.categoryMemberDao = categoryMemberDao; this.language = language; pageDistances.put(startPage, 0.000000); if (startNamespace == NameSpace.ARTICLE) { Collection<Integer> cats = categoryMemberDao.getCategoryIds(language, startId); if (cats!=null){ for (int catId : cats) { int ci = graph.catIdToIndex(catId); if (ci >= 0) { openCats.add(new CategoryDistance(ci, graph.cats[ci], graph.catCosts[ci], (byte)direction)); } } } } else if (startNamespace == NameSpace.CATEGORY) { int ci = graph.catIdToIndex(startId); if (ci >= 0) { openCats.add(new CategoryDistance(ci, graph.cats[ci], 0.000000001, (byte)direction)); } } else { throw new IllegalArgumentException(); } }
@Override public LocalPage getClosestCategory(LocalPage page, Set<LocalPage> candidates, boolean weightedDistance) throws DaoException { CategoryGraph graph = getGraph(page.getLanguage()); CategoryBfs bfs = new CategoryBfs(graph, page.getLocalId(), page.getLanguage(), Integer.MAX_VALUE, null, this); bfs.setAddPages(false); bfs.setExploreChildren(false); Map<Integer, LocalPage> indexToCandidates = new HashMap<Integer, LocalPage>(); for (LocalPage c : candidates) { indexToCandidates.put(graph.catIdToIndex(c.getLocalId()), c); } List<LocalPage> matches = new ArrayList<LocalPage>(); while (bfs.hasMoreResults() && matches.isEmpty()) { CategoryBfs.BfsVisited visited = bfs.step(); for (int catId : visited.cats.keys()) { if (indexToCandidates.containsKey(catId)) { matches.add(indexToCandidates.get(catId)); } } } if (matches.isEmpty()) { return null; } else { return matches.get(new Random().nextInt(matches.size())); } }
int catIndex1 = graph.catIdToIndex(lcm.getArticleId()); // cat index for page (probably -1) int catIndex2 = graph.catIdToIndex(lcm.getCategoryId()); // cat index for cat if (catIndex1 >= 0 && catIndex2 >= 0) { numCatChildren[catIndex2]++; int catIndex1 = graph.catIdToIndex(lcm.getArticleId()); // cat index for page (probably -1) int catIndex2 = graph.catIdToIndex(lcm.getCategoryId()); // cat index for cat if (catIndex1 >= 0 && catIndex2 >= 0) { graph.catChildren[catIndex2][--numCatChildren[catIndex2]] = catIndex1;
int i = graph.catIdToIndex(p.getLocalId()); if (i >= 0) goalIndexes.add(i); TIntDoubleMap distances = new TIntDoubleHashMap(); for (int catId : getCategoryIds(language, pageId)) { int i = graph.catIdToIndex(catId); if (i >= 0) frontier.add(new CatCost(null, -1, i, graph.catCosts[i]));
for (LocalPage p : candidateCategories) { if (p.getLanguage() != language) throw new IllegalStateException("Category languages must be identitical"); CatCost cc = new CatCost(p, p.getLocalId(), graph.catIdToIndex(p.getLocalId()), 0.0); if (cc.catIndex >= 0) { frontier.add(cc);