/** * Returns the index of the specified ID, or -1 if not found. * @param id * @return */ public int getIndexForId(int id) { for (int i = 0; i < numDocs(); i++) { if (results[i].id == id) { return i; } } return -1; }
/** * Returns the score for the specified ID, or Double.NaN if not found. * @param id * @return */ public double getScoreForId(int id) { for (int i = 0; i < numDocs(); i++) { if (results[i].id == id) { return results[i].getScore(); } } return Double.NaN; }
public String toString() { StringBuilder builder = new StringBuilder(); for (int i = 0; i < numDocs(); i++) { if (i > 0) builder.append(" "); builder.append( String.format("%d. %d=%.3f", (i+1), results[i].getId(), results[i].getScore()) ); } return builder.toString(); }
@Override public SRResultList normalize(SRResultList list) { SRResultList normalized = new SRResultList(list.numDocs()); normalized.setMissingScore(missingMean); for (int i = 0; i < list.numDocs(); i++) { double s = logIfNecessary(list.getScore(i)); double score = intercept + rankCoeff * Math.log(i + 1) + scoreCoeff * s; normalized.set(i, list.getId(i), score); } return normalized; }
for (LocalId id1 : candidates.keySet()) { SRResultList sr = metric.mostSimilar(id1.getId(), numCands * 2); if (sr != null && sr.numDocs() > 0) { for (int j = 0; j < numPerCand && j < sr.numDocs(); j++) { expanded.put(new LocalId(language, sr.getId(j)), (float)(sr.getScore(j) * candidates.get(id1)));
/** * A basic implementation of normalize. * @param list */ @Override public SRResultList normalize(SRResultList list) { SRResultList dsl = new SRResultList(list.numDocs()); list.setMissingScore(missingMean); for (int i = 0; i < list.numDocs(); i++) { dsl.set(i, list.getId(i), normalize(list.getScore(i))); } return dsl; }
public static void main(String[] args) throws Exception{ // Initialize the WikiBrain environment and get the local page dao Env env = EnvBuilder.envFromArgs(args); Configurator conf = env.getConfigurator(); LocalPageDao lpDao = conf.get(LocalPageDao.class); Language simple = env.getDefaultLanguage(); // Retrieve the "milnewitten" sr metric for simple english SRMetric sr = conf.get( SRMetric.class, "prebuiltword2vec", "language", simple.getLangCode()); //Similarity between strings for (String phrase : Arrays.asList("Barack Obama", "US", "Canada", "vim")) { SRResultList similar = sr.mostSimilar(phrase, 3); List<String> pages = new ArrayList<String>(); for (int i = 0; i < similar.numDocs(); i++) { LocalPage page = lpDao.getById(simple, similar.getId(i)); pages.add((i+1) + ") " + page.getTitle()); } System.out.println("'" + phrase + "' is similar to " + StringUtils.join(pages, ", ")); } } }
public static void main(String[] args) throws Exception{ // Initialize the WikiBrain environment and get the local page dao Env env = EnvBuilder.envFromArgs(args); Configurator conf = env.getConfigurator(); LocalPageDao lpDao = conf.get(LocalPageDao.class); Language simple = env.getDefaultLanguage(); // Retrieve the "milnewitten" sr metric for simple english SRMetric sr = conf.get( SRMetric.class, "prebuiltword2vec", "language", simple.getLangCode()); //Similarity between strings for (String phrase : Arrays.asList("Barack Obama", "US", "Canada", "vim")) { SRResultList similar = sr.mostSimilar(phrase, 3); List<String> pages = new ArrayList<String>(); for (int i = 0; i < similar.numDocs(); i++) { LocalPage page = lpDao.getById(simple, similar.getId(i)); pages.add((i+1) + ") " + page.getTitle()); } System.out.println("'" + phrase + "' is similar to " + StringUtils.join(pages, ", ")); } } }
@Override public List<Explanation> getExplanations(String phrase1, String phrase2, TIntFloatMap vector1, TIntFloatMap vector2, SRResult result) throws DaoException { Leaderboard lb = new Leaderboard(5); // TODO: make 5 configurable for (int id : vector1.keys()) { if (vector2.containsKey(id)) { lb.tallyScore(id, vector1.get(id) * vector2.get(id)); } } SRResultList top = lb.getTop(); if (top.numDocs() == 0) { return Arrays.asList(new Explanation("? and ? share no tags", phrase1, phrase2)); } List<Explanation> explanations = new ArrayList<Explanation>(); for (int i = 0; i < top.numDocs(); i++) { LocalPage p = pageDao.getById(language, searcher.getLocalIdFromDocId(top.getId(i), language)); if (p != null) { explanations.add(new Explanation("Both ? and ? have similar text to ?", phrase1, phrase2, p)); } } return explanations; }
@Override public List<Explanation> getExplanations(int pageID1, int pageID2, TIntFloatMap vector1, TIntFloatMap vector2, SRResult result) throws DaoException { LocalPage page1=pageDao.getById(language,pageID1); LocalPage page2=pageDao.getById(language,pageID2); Leaderboard lb = new Leaderboard(5); // TODO: make 5 configurable for (int id : vector1.keys()) { if (vector2.containsKey(id)) { lb.tallyScore(id, vector1.get(id) * vector2.get(id)); } } SRResultList top = lb.getTop(); if (top.numDocs() == 0) { return Arrays.asList(new Explanation("? and ? share no links", page1, page2)); } List<Explanation> explanations = new ArrayList<Explanation>(); for (int i = 0; i < top.numDocs(); i++) { LocalPage p = pageDao.getById(language, top.getId(i)); if (p != null) { explanations.add(new Explanation("Both ? and ? have similar text to ?", page1, page2, p)); } } return explanations; }
@Override public List<Explanation> getExplanations(int pageID1, int pageID2, TIntFloatMap vector1, TIntFloatMap vector2, SRResult result) throws DaoException { LocalPage page1=pageDao.getById(language,pageID1); LocalPage page2=pageDao.getById(language,pageID2); Leaderboard lb = new Leaderboard(5); // TODO: make 5 configurable for (int id : vector1.keys()) { if (vector2.containsKey(id)) { lb.tallyScore(id, vector1.get(id) * vector2.get(id)); } } SRResultList top = lb.getTop(); if (top.numDocs() == 0) { return Arrays.asList(new Explanation("? and ? share no similar pages", page1, page2)); } List<Explanation> explanations = new ArrayList<Explanation>(); for (int i = 0; i < top.numDocs(); i++) { LocalPage p = pageDao.getById(language, top.getId(i)); if (p != null) { explanations.add(new Explanation("Both ? and ? are similar to ?", page1, page2, p)); } } return explanations; }
@Override public List<Explanation> getExplanations(int pageID1, int pageID2, TIntFloatMap vector1, TIntFloatMap vector2, SRResult result) throws DaoException { LocalPage page1=pageDao.getById(language,pageID1); LocalPage page2=pageDao.getById(language,pageID2); Leaderboard lb = new Leaderboard(5); // TODO: make 5 configurable for (int id : vector1.keys()) { if (vector2.containsKey(id)) { lb.tallyScore(id, vector1.get(id) * vector2.get(id)); } } SRResultList top = lb.getTop(); if (top.numDocs() == 0) { return Arrays.asList(new Explanation("? and ? share no links", page1, page2)); } List<Explanation> explanations = new ArrayList<Explanation>(); for (int i = 0; i < top.numDocs(); i++) { LocalPage p = pageDao.getById(language, top.getId(i)); if (p == null) { continue; } if (outLinks) { explanations.add(new Explanation("Both ? and ? link to ?", page1, page2, p)); } else { explanations.add(new Explanation("? links to both ? and ?", p, page1, page2)); } } return explanations; }
public MostSimilarGuess(KnownMostSim known, SRResultList guess) { this.known = known; length = guess.numDocs(); minScore = guess.minScore(); maxScore = guess.maxScore(); TIntDoubleMap actual = new TIntDoubleHashMap(); for (KnownSim ks : known.getMostSimilar()) { actual.put(ks.wpId2, ks.similarity); } for (int i = 0; i < guess.numDocs(); i++) { SRResult sr = guess.get(i); if (actual.containsKey(sr.getId())) { observations.add(new Observation(i+1, sr.getId(), sr.getScore(), actual.get(sr.getId()))); } } }
for (int i = 0; i < results.numDocs(); i++) { results.setId(i, dense2Sparse[results.getId(i)]);
if (results.numDocs() < numResults) { return null;
for (int i = 0; i < sorted.numDocs(); i++) { if (i < numStopArticles) {