private LocalLink cloneLinkWithLocation(LocalLink ll, int location) { return new LocalLink( ll.getLanguage(), ll.getAnchorText(), ll.getSourceId(), ll.getDestId(), ll.isOutlink(), location, ll.isParseable(), ll.getLocType() ); }
@Override public void save(LocalLink localLink) throws DaoException { insert( localLink.getLanguage().getId(), localLink.getAnchorText(), localLink.getSourceId(), localLink.getDestId(), localLink.getLocation(), localLink.isParseable(), localLink.getLocType().ordinal() ); }
public LinkInfo(LocalLink link) { this.startChar = link.getLocation(); this.endChar = startChar + link.getAnchorText().length(); this.anchortext = link.getAnchorText(); this.knownDest = link.getDestId(); }
public void save(LocalLink localLink, int sourceUnivId, int destUnivId, int algorithmId) throws DaoException { insert( localLink.getLanguage().getId(), localLink.getSourceId(), localLink.getDestId(), sourceUnivId, destUnivId, algorithmId ); }
private void fillBuffer() { if (finished || buffer != null) { return; } if (!iter.hasNext()) { finished = true; return; } LocalLink ll = iter.next(); if (ll == null) { finished = true; return; } buffer = new BasePhraseAnalyzer.Entry( ll.getLanguage(), ll.getDestId(), ll.getAnchorText(), 1 ); } }
private void identifyKnownCandidates(int wpId, List<LinkInfo> candidates) throws DaoException { Set<String> usedAnchors = new HashSet<String>(); /** * Hack: Mark the FIRST POSSIBLE of each candidate link as verified. */ for (LocalLink ll : lld.getLinks(language, wpId, true)) { if (ll.getDestId() < 0 || ll.getAnchorText() == null || usedAnchors.contains(ll.getAnchorText())) { continue; } for (LinkInfo li : candidates) { if (ll.getAnchorText().equals(li.getAnchortext())) { if (li.getKnownDest() != null) { LOG.info("conflict for link info " + li.getAnchortext() + " between " + li.getKnownDest() + " and " + ll.getDestId()); } else { li.setKnownDest(ll.getDestId()); break; } } } usedAnchors.add(ll.getAnchorText()); } }
public LocalLink toLocalLink(Language language, int wpId) { return new LocalLink(language, anchortext, wpId, dest, true, startChar, true, LocalLink.LocationType.NONE); } }
private TIntSet getActualLinks(int wpId) throws DaoException { TIntSet existingIds = new TIntHashSet(); for (LocalLink ll : linkDao.getLinks(language, wpId, true)) { if (ll.getDestId() >= 0) { existingIds.add(ll.getDestId()); } } // hack: add the link itself existingIds.add(wpId); return existingIds; }
@Override public List<LocalLink> wikify(int wpId, String text) throws DaoException { if (text == null || text.isEmpty()) { return new ArrayList<LocalLink>(); } List<LocalLink> links = new ArrayList<LocalLink>(); for (LocalLink ll : linkDao.getLinks(language, wpId, true)) { if (ll.getLocation() >= 0 && ll.isParseable() && !StringUtils.isNullOrEmpty(ll.getAnchorText())) { links.add(ll); } } Collections.sort(links); return align(links, text); }
for (LocalLink link : linkDao.getLinks(language, pageId, outLinks)) { int columnId = outLinks ? link.getDestId() : link.getSourceId(); if (columnId < 0) { continue;
private List<LocalLink> link(int wpId, String text, List<LinkInfo> infos) throws DaoException { BitSet used = new BitSet(text.length()); List<LocalLink> results = identityWikifier.wikify(wpId, text); for (LocalLink li : results) { used.set(li.getLocation(), li.getLocation() + li.getAnchorText().length()); } Collections.sort(infos); for (LinkInfo li : infos) { if (li.getDest() != null && li.getScore() > minFinalScore && used.get(li.getStartChar(), li.getEndChar()).isEmpty()) { results.add(li.toLocalLink(language, wpId)); used.set(li.getStartChar(), li.getEndChar()); } } Collections.sort(results, new Comparator<LocalLink>() { @Override public int compare(LocalLink o1, LocalLink o2) { return o1.getLocation() - o2.getLocation(); } }); return results; }
while (iter.hasNext()) { LocalLink ll = iter.next(); String a = ll.getAnchorText(); int j = text.indexOf(a, i); String a = ll.getAnchorText(); i = findNextUnused(text, a, i, used); if (i >= 0) {
private TIntSet getInlinks(int pageId1) throws DaoException { TIntSet inlinks = new TIntHashSet(); for (LocalLink ll : linkDao.getLinks(language, pageId1, false)) { inlinks.add(ll.getSourceId()); } return inlinks; } private TIntSet getOutlinks(int pageId1) throws DaoException {
@Override public int compare(LocalLink l1, LocalLink l2) { return l1.getLocation() - l2.getLocation(); } });
@Override public void save(LocalLink item) throws DaoException { if (delegate != null) delegate.save(item); // skip red links if (item.getDestId() < 0 || item.getSourceId() < 0) { return; } LocalId src = new LocalId(item.getLanguage(), item.getSourceId()); LocalId dest = new LocalId(item.getLanguage(), item.getDestId()); if (!src.canPackInInt() || !dest.canPackInInt()) { return; } try { BufferedWriter writer = getSortingWriter(); writer.write(src.toInt() + " " + dest.toInt() + "\n"); } catch (IOException e) { throw new DaoException(e); } }
public static void main(String args[]) throws ConfigurationException, DaoException { Env env = EnvBuilder.envFromArgs(args); LocalPageDao pageDao = env.getConfigurator().get(LocalPageDao.class); // Get the default wikifier in the default language. Wikifier wikifier = env.getConfigurator().get( Wikifier.class, "websail", "language", env.getDefaultLanguage().getLangCode()); for (LocalLink link : wikifier.wikify(CORPUS)) { System.out.println("link is " + link.getAnchorText() + " is for " + pageDao.getById(link.getLanguage(), link.getDestId())); } } }
while (m < mentions.size() && mentions.get(m).getLocation() < token.getBegin()) { m++; if (m < mentions.size() && mentions.get(m).getLocation() < token.getEnd()) { int end = mentions.get(m).getLocation() + mentions.get(m).getAnchorText().length(); phrase += words.get(w).getToken(); phrase += ":" + getMentionUrl(mentions.get(m).getDestId());
public LocalLink getLocalOutLink(Language lang, int sourceId) { return new LocalLink(lang, null, sourceId, pageId, true, -1, null, null); }
private TIntSet getOutlinks(int pageId1) throws DaoException { TIntSet outlinks = new TIntHashSet(); for (LocalLink ll : linkDao.getLinks(language, pageId1, true)) { outlinks.add(ll.getDestId()); } return outlinks; }
public static void main(String args[]) throws ConfigurationException, DaoException, IOException { //LocalLinkDao ldao = new Configurator(new Configuration()).get(LocalLinkDao.class, "live"); //try to change from "live" to "dao" LocalLinkLiveDao ldao = new LocalLinkLiveDao(); Language lang = Language.getByLangCode("simple"); int sourceId = 10983; //Minnesota int destId = 3009; //California LocalLink link = ldao.getLink(lang, sourceId, destId); if(link != null) System.out.println("Got link \"" + link.getAnchorText() + "\" from " + sourceId + " to " + destId); Iterable<LocalLink> inlinks = ldao.getLinks(lang, sourceId, false); System.out.println("\nLinks into page " + sourceId + ":"); for (LocalLink inlink : inlinks) { System.out.println(inlink); } Iterable<LocalLink> outlinks = ldao.getLinks(lang, sourceId, true); System.out.println("\nLinks out of page " + sourceId + ":"); for (LocalLink outlink : outlinks) { System.out.println(outlink); } }