@Override public SRResult similarity(String phrase1, String phrase2, boolean explanations) throws DaoException { return delegate.similarity(phrase1, phrase2, explanations); }
@Override public SRResultList mostSimilar(int pageId, int maxResults) throws DaoException { return delegate.mostSimilar(pageId, maxResults); }
@Override public double[][] cosimilarity(int[] ids) throws DaoException { return delegate.cosimilarity(ids); }
if (skipBuiltMetrics && metric.similarityIsTrained()) { LOG.info("metric " + name + " similarity() is already trained... skipping"); } else { metric.trainSimilarity(ds); if (skipBuiltMetrics && metric.mostSimilarIsTrained()) { LOG.info("metric " + name + " mostSimilar() is already trained... skipping"); } else { String path = String.format("%s/%s.txt", config.getString("mostSimilarConcepts"), metric.getLanguage().getLangCode()); validIds = readIds(path); metric.trainMostSimilar(ds, n, validIds); metric.write();
@Override public void call(KnownSim ks) throws Exception { EnsembleSim es = new EnsembleSim(ks); for (SRMetric metric : metrics){ double score = Double.NaN; try { SRResult result = metric.similarity(ks.phrase1,ks.phrase2,false); if (result != null) { score = result.getScore(); } } catch (Exception e){ LOG.warn("Local sr metric " + metric.getName() + " failed for " + ks, e); } es.add(score, 0); } ensembleSims.add(es); } },
@Override protected SimilarityEvaluationLog evaluateSplit(MonolingualSRFactory factory, Split split, File log, File err, Map<String, String> config) throws DaoException, IOException { SRMetric metric = factory.create(); metric.trainSimilarity(split.getTrain()); SimilarityEvaluationLog splitEval = new SimilarityEvaluationLog(config, log); BufferedWriter errFile = new BufferedWriter(new FileWriter(err)); SRResult result; if (shouldResolvePhrases()) { result = metric.similarity(ks.wpId1, ks.wpId2, false); } else { result = metric.similarity(ks.phrase1, ks.phrase2, false);
public EnsembleSim call(KnownSim ks) throws DaoException { List<LocalString> localStrings = Arrays.asList( new LocalString(ks.language, ks.phrase1), new LocalString(ks.language, ks.phrase2) ); List<LocalId> ids = getDisambiguator().disambiguateTop(localStrings, null); if (ids.isEmpty() || ids.get(0).getId() <= 0) { return null; } int pageId = ids.get(0).getId(); EnsembleSim es = new EnsembleSim(ks); for (SRMetric metric : metrics) { double score = Double.NaN; int rank = -1; try { SRResultList dsl = metric.mostSimilar(pageId, getMaxResults(numResults), validIds); if (dsl != null && dsl.getIndexForId(ids.get(1).getId()) >= 0) { score = dsl.getScore(dsl.getIndexForId(ids.get(1).getId())); rank = dsl.getIndexForId(ids.get(1).getId()); } } catch (Exception e) { LOG.warn("Local sr metric " + metric.getName() + " failed for " + pageId, e); } finally { es.add(score, rank); } } return es; } }, 100);
public SimilarityDisambiguator(PhraseAnalyzer phraseAnalyzer, SRMetric metric) { this.phraseAnalyzer = phraseAnalyzer; this.metric = metric; this.language = metric.getLanguage(); }
return; if (!dataset.getLanguage().equals(metric.getLanguage())) { throw new IllegalArgumentException("SR metric has language " + metric.getLanguage() + " but dataset has language " + dataset.getLanguage()); LOG.info("trained most similar normalizer for " + metric.getName() + ": " + trainee.dump()); } finally { mostSimilarNormalizer = trainee;
@Override public String getName() { return metric.getName(); }
@Override public Normalizer getSimilarityNormalizer() { return delegate.getSimilarityNormalizer(); }
@Override public Normalizer getMostSimilarNormalizer() { return delegate.getMostSimilarNormalizer(); }
@Override public Language getLanguage() { return delegate.getLanguage(); }
@Override public String describeMetric() { return "pretrained-" + metric.getName(); }
@Override public SRResult similarity(int pageId1, int pageId2, boolean explanations) throws DaoException { return delegate.similarity(pageId1, pageId2, explanations); }
@Override public SRResultList mostSimilar(String phrase, int maxResults) throws DaoException { return delegate.mostSimilar(phrase, maxResults); }
@Override public double[][] cosimilarity(String[] rowPhrases, String[] colPhrases) throws DaoException { return delegate.cosimilarity(rowPhrases, colPhrases); }
public MilneWittenWikifier(SRMetric metric, AnchorTextPhraseAnalyzer pa, LocalPageDao lpd, RawPageDao rpd, LocalLinkDao lld, LinkProbabilityDao linkProbDao) { this.lpd = lpd; this.linkProbDao = linkProbDao; this.phraseDao = pa.getDao(); this.metric = metric; this.rpd = rpd; this.lld = lld; this.language = metric.getLanguage(); }
@Override public String getName() { return delegate.getName(); }
public void evaluateOneSample() throws DaoException, WikiBrainException, IOException { UniversalPage c1 = concepts.get(random.nextInt(concepts.size())); UniversalPage c2 = concepts.get(random.nextInt(concepts.size())); List<SRResult> results = new ArrayList<SRResult>(); for (Language lang : langs) { SRMetric sr = metrics.get(lang); results.add(sr.similarity(c1.getLocalId(lang), c2.getLocalId(lang), false)); } writeRow(c1, c2, results); }