/** * Returns the score of the specified index. * @param i * @return */ public double getScore(int i) { assert(i < numDocs); return results[i].getScore(); }
@Override public SRResult similarity(int pageId1, int pageId2, boolean explanations) throws DaoException { SRResult r1 = inlink.similarity(pageId1, pageId2, explanations); SRResult r2 = outlink.similarity(pageId1, pageId2, explanations); if (r1 == null || r2 == null || !r1.isValid() || !r2.isValid()) { return new SRResult(Double.NaN); } else { SRResult finalResult=new SRResult(0.5 * r1.getScore() + 0.5 * r2.getScore()); if (explanations) { List<Explanation> explanationList = new ArrayList<Explanation>(); explanationList.addAll(r1.getExplanations()); explanationList.addAll(r2.getExplanations()); finalResult.setExplanations(explanationList); } return normalize(finalResult); } }
@Override public SRResult predictSimilarity(List<SRResult> scores) { double result=0.0; List<Explanation> explanationList = new ArrayList<Explanation>(); for (SRResult score : scores){ result+=score.getScore(); if (score.getExplanations()!=null&&!score.getExplanations().isEmpty()){ explanationList.addAll(score.getExplanations()); } } result/=scores.size(); return new SRResult(-2,result,explanationList); }
@Override public SRResult similarity(String phrase1, String phrase2, boolean explanations) throws DaoException { double sum = 0.0; double divisor = 0.0; for (SubMetric m : metrics) { SRResult r = m.metric.similarity(phrase1, phrase2, false); if (r != null && r.isValid()) { sum += m.coefficient * r.getScore(); divisor += m.coefficient; } } return new SRResult((divisor > 0) ? (sum / divisor) : Double.NaN); }
@Override public SRResultList predictMostSimilar(List<SRResultList> scores, int maxResults, TIntSet validIds) { int numMetrics = scores.size(); TIntDoubleHashMap scoreMap = new TIntDoubleHashMap(); for (SRResultList resultList : scores){ for (SRResult result : resultList){ double value = result.getScore()/numMetrics; scoreMap.adjustOrPutValue(result.getId(),value,value); } } List<SRResult> resultList = new ArrayList<SRResult>(); for (int id : scoreMap.keys()){ resultList.add(new SRResult(id,scoreMap.get(id))); } Collections.sort(resultList); Collections.reverse(resultList); SRResultList result = new SRResultList(maxResults); for (int i=0; i<maxResults&&i<resultList.size();i++){ result.set(i,resultList.get(i)); } return result; }
@Override public SRResult predictSimilarity(List<SRResult> scores) { if (scores.size()+1!= simlarityCoefficients.size()){ throw new IllegalStateException(); } double weightedScore = simlarityCoefficients.get(0); for (int i=0; i<scores.size(); i++){ double s = scores.get(i) == null ? Double.NaN : scores.get(i).getScore(); if (Double.isNaN(s) || Double.isInfinite(s)) { s = similarityInterpolator.getInterpolatedScore(i); } weightedScore+=(s * simlarityCoefficients.get(i+1)); } return new SRResult(weightedScore); }
public static void main(String args[]) throws ConfigurationException, DaoException { // Initialize the WikiBrain environment and get the local page dao Env env = EnvBuilder.envFromArgs(args); Configurator conf = env.getConfigurator(); LocalPageDao lpDao = conf.get(LocalPageDao.class); // Retrieve the "ensemble" sr metric for simple english SRMetric sr = conf.get( SRMetric.class, "milnewitten", "language", env.getDefaultLanguage().getLangCode()); //Similarity between strings String pairs[][] = new String[][] { { "cat", "kitty" }, { "Obama", "president" }, { "tires", "car" }, { "java", "computer" }, { "dog", "computer" }, }; ExplanationFormatter formatter= new ExplanationFormatter(lpDao); for (String pair[] : pairs) { SRResult s = sr.similarity(pair[0], pair[1], true); System.out.println(s.getScore() + ": '" + pair[0] + "', '" + pair[1] + "'"); for (Explanation e:s.getExplanations()) { // System.out.println(formatter.formatExplanation(e)); } } } }
private void doSimilarity(WikiBrainWebRequest req) throws ConfigurationException, DaoException { // TODO: support explanations Language lang = req.getLanguage(); List<WebEntity> entities = entityParser.extractEntityList(req); if (entities.size() != 2) { throw new WikiBrainWebException("Similarity requires exactly two entities"); } WebEntity entity1 = entities.get(0); WebEntity entity2 = entities.get(1); SRMetric sr = getSr(lang); SRResult r = null; switch (entity1.getType()) { case ARTICLE_ID: case TITLE: r = sr.similarity(entity1.getArticleId(), entity2.getArticleId(), false); break; case PHRASE: r = sr.similarity(entity1.getPhrase(), entity2.getPhrase(), false); break; default: throw new WikiBrainWebException("Unsupported entity type: " + entity1.getType()); } Double sim = (r != null && r.isValid()) ? r.getScore() : null; req.writeJsonResponse("score", sim, "entity1", entity1.toJson(), "entity2", entity2.toJson()); }
@Override public SRResult similarity(int pageId1, int pageId2, boolean explanations) throws DaoException { try { float [] v1 = getPageVector(pageId1); float [] v2 = getPageVector(pageId2); SRResult result = new SRResult(normalize(SimUtils.cosineSimilarity(v1, v2))); if (explanations) { result.setExplanations(generator.getExplanations(pageId1, pageId2, v1, v2, result)); } return result; } catch (IOException e) { throw new DaoException(e); } }
public SRResultList(int maxNumDocs) { this.results = new SRResult[maxNumDocs]; for (int i = 0; i < this.results.length; i++) { results[i] = new SRResult(); } numDocs = maxNumDocs; }
if (resultList != null) { for (SRResult result : resultList){ allIds.add(result.getId()); int rank = (int) ((j + 1) * k); SRResult result = resultList.get(j); unknownIds.remove(result.getId()); double value = c1 * result.getScore() + c2 * Math.log(rank); if (debug) { System.err.format("%s %d. %.3f (id=%d), computing %.3f * %.3f + %.3f * (log(%d) = %.3f)\n", "m" + i, j, value, result.getId(), c1, result.getScore(), c2, rank, Math.log(rank)); scoreMap.adjustOrPutValue(result.getId(), value, value); resultList.add(new SRResult(id,scoreMap.get(id)));
@Override public SRResult similarity(int pageId1, int pageId2, boolean explanations) throws DaoException { // TODO: Handle explanations double sum = 0.0; double divisor = 0.0; for (SubMetric m : metrics) { SRResult r = m.metric.similarity(pageId1, pageId2, false); if (r != null && r.isValid()) { sum += m.coefficient * r.getScore(); divisor += m.coefficient; } } return new SRResult((divisor > 0) ? (sum / divisor) : Double.NaN); }
@Override public SRResult predictSimilarity(List<SRResult> scores) { if (scores.size()+1!= simlarityCoefficients.size()){ throw new IllegalStateException(); } double weightedScore = simlarityCoefficients.get(0); for (int i=0; i<scores.size(); i++){ double s = scores.get(i) == null ? Double.NaN : scores.get(i).getScore(); if (Double.isNaN(s) || Double.isInfinite(s)) { s = similarityInterpolator.getInterpolatedScore(i); } weightedScore+=(s * simlarityCoefficients.get(i+1)); } return new SRResult(weightedScore); }
public static void main(String args[]) throws ConfigurationException, DaoException { // Initialize the WikiBrain environment and get the local page dao Env env = EnvBuilder.envFromArgs(args); Configurator conf = env.getConfigurator(); LocalPageDao lpDao = conf.get(LocalPageDao.class); // Retrieve the "ensemble" sr metric for simple english SRMetric sr = conf.get( SRMetric.class, "milnewitten", "language", env.getDefaultLanguage().getLangCode()); //Similarity between strings String pairs[][] = new String[][] { { "cat", "kitty" }, { "Obama", "president" }, { "tires", "car" }, { "java", "computer" }, { "dog", "computer" }, }; ExplanationFormatter formatter= new ExplanationFormatter(lpDao); for (String pair[] : pairs) { SRResult s = sr.similarity(pair[0], pair[1], true); System.out.println(s.getScore() + ": '" + pair[0] + "', '" + pair[1] + "'"); for (Explanation e:s.getExplanations()) { // System.out.println(formatter.formatExplanation(e)); } } } }
@Override public SRResultList mostSimilar(int pageId, int maxResults, TIntSet validIds) throws DaoException { TIntSet candidateSet = new TIntHashSet(); for (SubMetric m : metrics) { SRResultList rl = m.metric.mostSimilar(pageId, (int)(maxResults * numCandidateMultiplier), validIds); if (rl != null) { for (SRResult r : rl) { candidateSet.add(r.getId()); } } } int candidates[] = candidateSet.toArray(); double cosims[][] = cosimilarity(new int[]{pageId}, candidates); Leaderboard top = new Leaderboard(maxResults); for (int i = 0; i < candidates.length; i++) { top.tallyScore(candidates[i], cosims[0][i]); } return top.getTop(); }
@Override public SRResult similarity(String phrase1, String phrase2, boolean explanations) throws DaoException { if (featureFilter != null) { throw new UnsupportedOperationException(); } TIntFloatMap vector1 = null; TIntFloatMap vector2 = null; // try using phrases directly try { vector1 = generator.getVector(phrase1); vector2 = generator.getVector(phrase2); } catch (UnsupportedOperationException e) { // try using other methods } if (vector1 == null || vector2 == null) { return super.similarity(phrase1, phrase2, explanations); } else { SRResult result= new SRResult(similarity.similarity(vector1, vector2)); if(explanations) { result.setExplanations(generator.getExplanations(phrase1, phrase2, vector1, vector2, result)); } return normalize(result); } }
/** * Sets the ID, score, and explanations of the SRResult at the index. * @param i * @param id * @param score * @param explanationList */ public void set(int i, int id, double score, List<Explanation> explanationList){ set(i, new SRResult(id, score, explanationList)); }
public double minScore() { return (numDocs == 0) ? 0.0 : this.results[numDocs-1].getScore(); }