private TIntSet getActualLinks(int wpId) throws DaoException { TIntSet existingIds = new TIntHashSet(); for (LocalLink ll : linkDao.getLinks(language, wpId, true)) { if (ll.getDestId() >= 0) { existingIds.add(ll.getDestId()); } } // hack: add the link itself existingIds.add(wpId); return existingIds; }
public LinkInfo(LocalLink link) { this.startChar = link.getLocation(); this.endChar = startChar + link.getAnchorText().length(); this.anchortext = link.getAnchorText(); this.knownDest = link.getDestId(); }
private void fillBuffer() { if (finished || buffer != null) { return; } if (!iter.hasNext()) { finished = true; return; } LocalLink ll = iter.next(); if (ll == null) { finished = true; return; } buffer = new BasePhraseAnalyzer.Entry( ll.getLanguage(), ll.getDestId(), ll.getAnchorText(), 1 ); } }
private TIntSet getOutlinks(int pageId1) throws DaoException { TIntSet outlinks = new TIntHashSet(); for (LocalLink ll : linkDao.getLinks(language, pageId1, true)) { outlinks.add(ll.getDestId()); } return outlinks; }
private void identifyKnownCandidates(int wpId, List<LinkInfo> candidates) throws DaoException { Set<String> usedAnchors = new HashSet<String>(); /** * Hack: Mark the FIRST POSSIBLE of each candidate link as verified. */ for (LocalLink ll : lld.getLinks(language, wpId, true)) { if (ll.getDestId() < 0 || ll.getAnchorText() == null || usedAnchors.contains(ll.getAnchorText())) { continue; } for (LinkInfo li : candidates) { if (ll.getAnchorText().equals(li.getAnchortext())) { if (li.getKnownDest() != null) { LOG.info("conflict for link info " + li.getAnchortext() + " between " + li.getKnownDest() + " and " + ll.getDestId()); } else { li.setKnownDest(ll.getDestId()); break; } } } usedAnchors.add(ll.getAnchorText()); } }
phrase += words.get(w).getToken(); phrase += ":" + getMentionUrl(mentions.get(m).getDestId());
private void doWikify(WikiBrainWebRequest req) throws ConfigurationException, DaoException { Language lang = req.getLanguage(); Wikifier wf = env.getConfigurator().get(Wikifier.class, "websail", "language", lang.getLangCode()); String text = req.getParamOrDie("text"); List jsonConcepts = new ArrayList(); for (LocalLink ll : wf.wikify(text)) { LocalPage page = pageDao.getById(lang, ll.getDestId()); Map obj = new HashMap(); obj.put("index", ll.getLocation()); obj.put("text", ll.getAnchorText()); obj.put("lang", lang.getLangCode()); obj.put("articleId", ll.getDestId()); obj.put("title", page == null ? "Unknown" : page.getTitle().getCanonicalTitle()); jsonConcepts.add(obj); } req.writeJsonResponse("text", text, "references", jsonConcepts); }
int i = 0; for (LocalLink ll : sqlDao.get(new DaoFilter())) { if (ll.getSourceId() < 0 || ll.getDestId() < 0) { continue; LocalId dest = new LocalId(ll.getLanguage(), ll.getDestId()); if (!outGraph.containsKey(src)) { outGraph.put(src, new HashSet<LocalId>()); Set<LocalId> actual = new HashSet<LocalId>(); for (LocalLink ll : matrixDao.getLinks(src.getLanguage(), src.getId(), true)) { if (ll.getSourceId() < 0 || ll.getDestId() < 0) { continue; actual.add(new LocalId(ll.getLanguage(), ll.getDestId()));
public void testWikify() throws DaoException { int barackId = lpd.getIdByTitle("Barack Obama", language, NameSpace.ARTICLE); RawPage rp = rpd.getById(language, barackId); for (int i = 0; i < 1; i++) { List<LocalLink> detected = wikify(rp.getLocalId()); System.out.println("Links detected for " + rp.getTitle() + " (" + i + ")"); for (LocalLink ll : detected) { System.out.println("\t" + ll + " page " + lpd.getById(language, ll.getDestId()).getTitle()); } } }
int i = 0; for (LocalLink ll : sqlDao.get(new DaoFilter())) { if (ll.getSourceId() < 0 || ll.getDestId() < 0) { continue; LocalId dest = new LocalId(ll.getLanguage(), ll.getDestId()); if (!outGraph.containsKey(src)) { outGraph.put(src, new HashSet<LocalId>()); Set<LocalId> actual = new HashSet<LocalId>(); for (LocalLink ll : matrixDao.getLinks(src.getLanguage(), src.getId(), true)) { if (ll.getSourceId() < 0 || ll.getDestId() < 0) { continue; actual.add(new LocalId(ll.getLanguage(), ll.getDestId()));
public void save(LocalLink localLink, int sourceUnivId, int destUnivId, int algorithmId) throws DaoException { insert( localLink.getLanguage().getId(), localLink.getSourceId(), localLink.getDestId(), sourceUnivId, destUnivId, algorithmId ); }
for (LocalLink link : linkDao.getLinks(language, pageId, outLinks)) { int columnId = outLinks ? link.getDestId() : link.getSourceId(); if (columnId < 0) { continue;
@Override public void save(LocalLink item) throws DaoException { if (delegate != null) delegate.save(item); // skip red links if (item.getDestId() < 0 || item.getSourceId() < 0) { return; } LocalId src = new LocalId(item.getLanguage(), item.getSourceId()); LocalId dest = new LocalId(item.getLanguage(), item.getDestId()); if (!src.canPackInInt() || !dest.canPackInInt()) { return; } try { BufferedWriter writer = getSortingWriter(); writer.write(src.toInt() + " " + dest.toInt() + "\n"); } catch (IOException e) { throw new DaoException(e); } }
private LocalLink cloneLinkWithLocation(LocalLink ll, int location) { return new LocalLink( ll.getLanguage(), ll.getAnchorText(), ll.getSourceId(), ll.getDestId(), ll.isOutlink(), location, ll.isParseable(), ll.getLocType() ); }
private TIntSet getLinksFrom(int pageId) throws DaoException { TIntSet ids = new TIntHashSet(); for (LocalLink ll : linkDao.get(new DaoFilter().setSourceIds(pageId).setLanguages(getLanguage()))) { ids.add(ll.getDestId()); } return ids; }
public static void main(String args[]) throws ConfigurationException, DaoException { Env env = EnvBuilder.envFromArgs(args); LocalPageDao pageDao = env.getConfigurator().get(LocalPageDao.class); // Get the default wikifier in the default language. Wikifier wikifier = env.getConfigurator().get( Wikifier.class, "websail", "language", env.getDefaultLanguage().getLangCode()); for (LocalLink link : wikifier.wikify(CORPUS)) { System.out.println("link is " + link.getAnchorText() + " is for " + pageDao.getById(link.getLanguage(), link.getDestId())); } } }
public static void main(String args[]) throws ConfigurationException, DaoException { Env env = EnvBuilder.envFromArgs(args); LocalPageDao pageDao = env.getConfigurator().get(LocalPageDao.class); // Get the default wikifier in the default language. Wikifier wikifier = env.getConfigurator().get( Wikifier.class, "websail", "language", env.getDefaultLanguage().getLangCode()); for (LocalLink link : wikifier.wikify(CORPUS)) { System.out.println("link is " + link.getAnchorText() + " is for " + pageDao.getById(link.getLanguage(), link.getDestId())); } } }
@Override public void save(LocalLink localLink) throws DaoException { insert( localLink.getLanguage().getId(), localLink.getAnchorText(), localLink.getSourceId(), localLink.getDestId(), localLink.getLocation(), localLink.isParseable(), localLink.getLocType().ordinal() ); }