@Override public SRResultList mostSimilar(int pageId, int maxResults) throws DaoException { return delegate.mostSimilar(pageId, maxResults); }
@Override public SRResultList mostSimilar(String phrase, int maxResults, TIntSet validIds) throws DaoException { return delegate.mostSimilar(phrase, maxResults, validIds); }
@Override public SRResultList mostSimilar(int pageId, int maxResults, TIntSet validIds) throws DaoException { return delegate.mostSimilar(pageId, maxResults, validIds); }
@Override public SRResultList mostSimilar(String phrase, int maxResults) throws DaoException { return delegate.mostSimilar(phrase, maxResults); }
@Override public TIntFloatMap getVector(int pageId) throws DaoException { SRResultList mostSimilar = baseMetric.mostSimilar(pageId, numConcepts, conceptIds); if (mostSimilar == null) { return null; } else { return mostSimilar.asTroveMap(); } }
@Override public SRResultList mostSimilar(String phrase, int maxResults, TIntSet validIds) throws DaoException { if (resolvePhrases) { return super.mostSimilar(phrase, maxResults, validIds); } List<SRResultList> scores = new ArrayList<SRResultList>(); for (SRMetric metric : metrics){ scores.add(metric.mostSimilar(phrase, getMaxResults(maxResults),validIds)); } return normalize(ensemble.predictMostSimilar(scores,maxResults, validIds)); }
@Override public SRResultList mostSimilar(int pageId, int maxResults, TIntSet validIds) throws DaoException { SRResultList mostSimilar= getCachedMostSimilar(pageId, maxResults, validIds); if (mostSimilar != null) { return mostSimilar; } List<SRResultList> scores = new ArrayList<SRResultList>(); for (SRMetric metric : metrics){ scores.add(metric.mostSimilar(pageId,getMaxResults(maxResults),validIds)); } SRResultList result = normalize(ensemble.predictMostSimilar(scores, maxResults, validIds)); return result; }
public void call(KnownSim ks) throws IOException, DaoException { ks.maybeSwap(); List<LocalString> localStrings = new ArrayList<LocalString>(); localStrings.add(new LocalString(ks.language, ks.phrase1)); localStrings.add(new LocalString(ks.language, ks.phrase2)); List<LocalId> ids = disambiguator.disambiguateTop(localStrings, null); if (ids != null && ids.size() == 2 && ids.get(0) != null && ids.get(1) != null) { LocalId lid1 = ids.get(0); LocalId lid2 = ids.get(1); SRResultList dsl = metric.mostSimilar(lid1.getId(), maxResults, validIds); if (dsl != null) { trainee.observe(dsl, dsl.getIndexForId(lid2.getId()), ks.similarity); } } } }, 100);
@Override public void call(String phrase) throws Exception { KnownMostSim kms = msd.getSimilarities(phrase); try { SRResultList result; if (shouldResolvePhrases()) { result = metric.mostSimilar(kms.getPageId(), numMostSimilarResults, mostSimilarIds); } else { result = metric.mostSimilar(phrase, numMostSimilarResults, mostSimilarIds); } splitEval.record(kms, result); } catch (Exception e) { LOG.warn("Similarity of " + kms.getPhrase() + ", id=" + kms.getPageId() + " failed. Logging error to " + err); splitEval.recordFailed(kms); synchronized (errFile) { errFile.write("KnownSim failed: " + phrase + "\n"); errFile.write("\t" + e.getMessage() + "\n"); for (String frame : ExceptionUtils.getStackFrames(e)) { errFile.write("\t" + frame + "\n"); } errFile.write("\n"); errFile.flush(); } } } }, 100);
public static void main(String[] args) throws Exception{ // Initialize the WikiBrain environment and get the local page dao Env env = EnvBuilder.envFromArgs(args); Configurator conf = env.getConfigurator(); LocalPageDao lpDao = conf.get(LocalPageDao.class); Language simple = env.getDefaultLanguage(); // Retrieve the "milnewitten" sr metric for simple english SRMetric sr = conf.get( SRMetric.class, "prebuiltword2vec", "language", simple.getLangCode()); //Similarity between strings for (String phrase : Arrays.asList("Barack Obama", "US", "Canada", "vim")) { SRResultList similar = sr.mostSimilar(phrase, 3); List<String> pages = new ArrayList<String>(); for (int i = 0; i < similar.numDocs(); i++) { LocalPage page = lpDao.getById(simple, similar.getId(i)); pages.add((i+1) + ") " + page.getTitle()); } System.out.println("'" + phrase + "' is similar to " + StringUtils.join(pages, ", ")); } } }
public static void main(String[] args) throws Exception{ // Initialize the WikiBrain environment and get the local page dao Env env = EnvBuilder.envFromArgs(args); Configurator conf = env.getConfigurator(); LocalPageDao lpDao = conf.get(LocalPageDao.class); Language simple = env.getDefaultLanguage(); // Retrieve the "milnewitten" sr metric for simple english SRMetric sr = conf.get( SRMetric.class, "prebuiltword2vec", "language", simple.getLangCode()); //Similarity between strings for (String phrase : Arrays.asList("Barack Obama", "US", "Canada", "vim")) { SRResultList similar = sr.mostSimilar(phrase, 3); List<String> pages = new ArrayList<String>(); for (int i = 0; i < similar.numDocs(); i++) { LocalPage page = lpDao.getById(simple, similar.getId(i)); pages.add((i+1) + ") " + page.getTitle()); } System.out.println("'" + phrase + "' is similar to " + StringUtils.join(pages, ", ")); } } }
private void doMostSimilar(WikiBrainWebRequest req) throws DaoException, ConfigurationException { Language lang = req.getLanguage(); WebEntity entity = entityParser.extractEntity(req); int n = Integer.valueOf(req.getParam("n", "10")); SRMetric sr = getSr(lang); SRResultList results; switch (entity.getType()) { case ARTICLE_ID: case TITLE: results = sr.mostSimilar(entity.getArticleId(), n); break; case PHRASE: results = sr.mostSimilar(entity.getPhrase(), n); break; default: throw new WikiBrainWebException("Unsupported entity type: " + entity.getType()); } List jsonResults = new ArrayList(); for (SRResult r : results) { LocalPage page = pageDao.getById(lang, r.getId()); Map obj = new HashMap(); obj.put("articleId", r.getId()); obj.put("score", r.getScore()); obj.put("lang", lang.getLangCode()); obj.put("title", page == null ? "Unknown" : page.getTitle().getCanonicalTitle()); jsonResults.add(obj); } req.writeJsonResponse("results", jsonResults); }
public EnsembleSim call(KnownSim ks) throws DaoException { List<LocalString> localStrings = Arrays.asList( new LocalString(ks.language, ks.phrase1), new LocalString(ks.language, ks.phrase2) ); List<LocalId> ids = getDisambiguator().disambiguateTop(localStrings, null); if (ids.isEmpty() || ids.get(0).getId() <= 0) { return null; } int pageId = ids.get(0).getId(); EnsembleSim es = new EnsembleSim(ks); for (SRMetric metric : metrics) { double score = Double.NaN; int rank = -1; try { SRResultList dsl = metric.mostSimilar(pageId, getMaxResults(numResults), validIds); if (dsl != null && dsl.getIndexForId(ids.get(1).getId()) >= 0) { score = dsl.getScore(dsl.getIndexForId(ids.get(1).getId())); rank = dsl.getIndexForId(ids.get(1).getId()); } } catch (Exception e) { LOG.warn("Local sr metric " + metric.getName() + " failed for " + pageId, e); } finally { es.add(score, rank); } } return es; } }, 100);
TIntSet candidateSet = new TIntHashSet(); for (SubMetric m : metrics) { SRResultList rl = m.metric.mostSimilar(phrase, (int) (maxResults * numCandidateMultiplier), validIds); if (rl != null) { for (SRResult r : rl) { for (SubMetric m : metrics) { SRResultList rl = m.metric.mostSimilar(phrase, (int) Math.ceil(candidateSet.size() * 0.8), candidateSet);
@Override public SRResultList mostSimilar(int pageId, int maxResults, TIntSet validIds) throws DaoException { TIntSet candidateSet = new TIntHashSet(); for (SubMetric m : metrics) { SRResultList rl = m.metric.mostSimilar(pageId, (int)(maxResults * numCandidateMultiplier), validIds); if (rl != null) { for (SRResult r : rl) { candidateSet.add(r.getId()); } } } int candidates[] = candidateSet.toArray(); double cosims[][] = cosimilarity(new int[]{pageId}, candidates); Leaderboard top = new Leaderboard(maxResults); for (int i = 0; i < candidates.length; i++) { top.tallyScore(candidates[i], cosims[0][i]); } return top.getTop(); }
SRResultList results = sr.mostSimilar(phrase, 10, null); results.sortDescending(); for (SRResult hit : results) { results = sr.mostSimilar(pageId, 10, candidates); results.sortDescending(); for (SRResult hit : results) {
SRResultList results = sr.mostSimilar(phrase, 10, null); results.sortDescending(); for (SRResult hit : results) { results = sr.mostSimilar(pageId, 10, candidates); results.sortDescending(); for (SRResult hit : results) {
@Override public SRResultList mostSimilar(int pageId, int maxResults, TIntSet validIds) throws DaoException { SRResultList l1 = inlink.mostSimilar(pageId, maxResults * 2, validIds); TIntDoubleMap scores = new TIntDoubleHashMap(maxResults * 4); SRResultList l2 = outlink.mostSimilar(pageId, maxResults * 2, validIds); TIntSet inList2 = new TIntHashSet(); if (l2 != null) {