public LinkInfo(LocalLink link) { this.startChar = link.getLocation(); this.endChar = startChar + link.getAnchorText().length(); this.anchortext = link.getAnchorText(); this.knownDest = link.getDestId(); }
while (iter.hasNext()) { LocalLink ll = iter.next(); String a = ll.getAnchorText(); int j = text.indexOf(a, i); String a = ll.getAnchorText(); i = findNextUnused(text, a, i, used); if (i >= 0) {
@Override public List<LocalLink> wikify(int wpId, String text) throws DaoException { if (text == null || text.isEmpty()) { return new ArrayList<LocalLink>(); } List<LocalLink> links = new ArrayList<LocalLink>(); for (LocalLink ll : linkDao.getLinks(language, wpId, true)) { if (ll.getLocation() >= 0 && ll.isParseable() && !StringUtils.isNullOrEmpty(ll.getAnchorText())) { links.add(ll); } } Collections.sort(links); return align(links, text); }
private void identifyKnownCandidates(int wpId, List<LinkInfo> candidates) throws DaoException { Set<String> usedAnchors = new HashSet<String>(); /** * Hack: Mark the FIRST POSSIBLE of each candidate link as verified. */ for (LocalLink ll : lld.getLinks(language, wpId, true)) { if (ll.getDestId() < 0 || ll.getAnchorText() == null || usedAnchors.contains(ll.getAnchorText())) { continue; } for (LinkInfo li : candidates) { if (ll.getAnchorText().equals(li.getAnchortext())) { if (li.getKnownDest() != null) { LOG.info("conflict for link info " + li.getAnchortext() + " between " + li.getKnownDest() + " and " + ll.getDestId()); } else { li.setKnownDest(ll.getDestId()); break; } } } usedAnchors.add(ll.getAnchorText()); } }
private void fillBuffer() { if (finished || buffer != null) { return; } if (!iter.hasNext()) { finished = true; return; } LocalLink ll = iter.next(); if (ll == null) { finished = true; return; } buffer = new BasePhraseAnalyzer.Entry( ll.getLanguage(), ll.getDestId(), ll.getAnchorText(), 1 ); } }
public static void main(String args[]) throws ConfigurationException, DaoException, IOException { //LocalLinkDao ldao = new Configurator(new Configuration()).get(LocalLinkDao.class, "live"); //try to change from "live" to "dao" LocalLinkLiveDao ldao = new LocalLinkLiveDao(); Language lang = Language.getByLangCode("simple"); int sourceId = 10983; //Minnesota int destId = 3009; //California LocalLink link = ldao.getLink(lang, sourceId, destId); if(link != null) System.out.println("Got link \"" + link.getAnchorText() + "\" from " + sourceId + " to " + destId); Iterable<LocalLink> inlinks = ldao.getLinks(lang, sourceId, false); System.out.println("\nLinks into page " + sourceId + ":"); for (LocalLink inlink : inlinks) { System.out.println(inlink); } Iterable<LocalLink> outlinks = ldao.getLinks(lang, sourceId, true); System.out.println("\nLinks out of page " + sourceId + ":"); for (LocalLink outlink : outlinks) { System.out.println(outlink); } }
public static void main(String args[]) throws ConfigurationException, DaoException, IOException { //LocalLinkDao ldao = new Configurator(new Configuration()).get(LocalLinkDao.class, "live"); //try to change from "live" to "dao" LocalLinkLiveDao ldao = new LocalLinkLiveDao(); Language lang = Language.getByLangCode("simple"); int sourceId = 10983; //Minnesota int destId = 3009; //California LocalLink link = ldao.getLink(lang, sourceId, destId); if(link != null) System.out.println("Got link \"" + link.getAnchorText() + "\" from " + sourceId + " to " + destId); Iterable<LocalLink> inlinks = ldao.getLinks(lang, sourceId, false); System.out.println("\nLinks into page " + sourceId + ":"); for (LocalLink inlink : inlinks) { System.out.println(inlink); } Iterable<LocalLink> outlinks = ldao.getLinks(lang, sourceId, true); System.out.println("\nLinks out of page " + sourceId + ":"); for (LocalLink outlink : outlinks) { System.out.println(outlink); } }
private List<LocalLink> link(int wpId, String text, List<LinkInfo> infos) throws DaoException { BitSet used = new BitSet(text.length()); List<LocalLink> results = identityWikifier.wikify(wpId, text); for (LocalLink li : results) { used.set(li.getLocation(), li.getLocation() + li.getAnchorText().length()); } Collections.sort(infos); for (LinkInfo li : infos) { if (li.getDest() != null && li.getScore() > minFinalScore && used.get(li.getStartChar(), li.getEndChar()).isEmpty()) { results.add(li.toLocalLink(language, wpId)); used.set(li.getStartChar(), li.getEndChar()); } } Collections.sort(results, new Comparator<LocalLink>() { @Override public int compare(LocalLink o1, LocalLink o2) { return o1.getLocation() - o2.getLocation(); } }); return results; }
int end = mentions.get(m).getLocation() + mentions.get(m).getAnchorText().length();
private void doWikify(WikiBrainWebRequest req) throws ConfigurationException, DaoException { Language lang = req.getLanguage(); Wikifier wf = env.getConfigurator().get(Wikifier.class, "websail", "language", lang.getLangCode()); String text = req.getParamOrDie("text"); List jsonConcepts = new ArrayList(); for (LocalLink ll : wf.wikify(text)) { LocalPage page = pageDao.getById(lang, ll.getDestId()); Map obj = new HashMap(); obj.put("index", ll.getLocation()); obj.put("text", ll.getAnchorText()); obj.put("lang", lang.getLangCode()); obj.put("articleId", ll.getDestId()); obj.put("title", page == null ? "Unknown" : page.getTitle().getCanonicalTitle()); jsonConcepts.add(obj); } req.writeJsonResponse("text", text, "references", jsonConcepts); }
private LocalLink cloneLinkWithLocation(LocalLink ll, int location) { return new LocalLink( ll.getLanguage(), ll.getAnchorText(), ll.getSourceId(), ll.getDestId(), ll.isOutlink(), location, ll.isParseable(), ll.getLocType() ); }
public static void main(String args[]) throws ConfigurationException, DaoException { Env env = EnvBuilder.envFromArgs(args); LocalPageDao pageDao = env.getConfigurator().get(LocalPageDao.class); // Get the default wikifier in the default language. Wikifier wikifier = env.getConfigurator().get( Wikifier.class, "websail", "language", env.getDefaultLanguage().getLangCode()); for (LocalLink link : wikifier.wikify(CORPUS)) { System.out.println("link is " + link.getAnchorText() + " is for " + pageDao.getById(link.getLanguage(), link.getDestId())); } } }
public static void main(String args[]) throws ConfigurationException, DaoException { Env env = EnvBuilder.envFromArgs(args); LocalPageDao pageDao = env.getConfigurator().get(LocalPageDao.class); // Get the default wikifier in the default language. Wikifier wikifier = env.getConfigurator().get( Wikifier.class, "websail", "language", env.getDefaultLanguage().getLangCode()); for (LocalLink link : wikifier.wikify(CORPUS)) { System.out.println("link is " + link.getAnchorText() + " is for " + pageDao.getById(link.getLanguage(), link.getDestId())); } } }
@Override public void save(LocalLink localLink) throws DaoException { insert( localLink.getLanguage().getId(), localLink.getAnchorText(), localLink.getSourceId(), localLink.getDestId(), localLink.getLocation(), localLink.isParseable(), localLink.getLocType().ordinal() ); }