/** * Runs a specified lucene query in the specified language with a specified hitcount. * @param query * @param language * @param hitCount * @param filter * @param resolveWpIds if True, returns wikipedia ids. otherwise returns lucene ids. * @return */ public WikiBrainScoreDoc[] search(Query query, Language language, int hitCount, Filter filter, boolean resolveWpIds) { if (!searchers.containsKey(language)) throw new IllegalArgumentException("Unknown language: " + language); try { this.hitCount = hitCount; ScoreDoc[] scoreDocs = searchers.get(language).search(query, filter, hitCount).scoreDocs; WikiBrainScoreDoc[] wikibrainScoreDocs = new WikiBrainScoreDoc[scoreDocs.length]; for (int i = 0; i < scoreDocs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; int wpId = resolveWpIds ? getLocalIdFromDocId(scoreDoc.doc, language) : -1; wikibrainScoreDocs[i] = new WikiBrainScoreDoc(scoreDoc.doc, wpId, scoreDoc.score); } return wikibrainScoreDocs; } catch (IOException e) { throw new RuntimeException(e); } }
@Override public LinkedHashMap<LocalId, Float> resolve(Language language, String phrase, int maxPages) throws DaoException { LinkedHashMap<LocalId, Float> result = new LinkedHashMap<LocalId, Float>(); WikiBrainScoreDoc[] wikibrainScoreDocs = searcher.getQueryBuilderByLanguage(language) .setPhraseQuery(phrase) .setNumHits(10) .search(); if (wikibrainScoreDocs.length == 0 && phrase.indexOf(" ") < 0) { String phraseMultiVersion = ""; for (int i = 1; i < phrase.length(); i++) { phraseMultiVersion += (i > 2 ? phrase.substring(0, i) + " " : ""); phraseMultiVersion += (phrase.length() - i > 2 ? phrase.substring(i, phrase.length()) + " " : ""); } wikibrainScoreDocs = searcher.getQueryBuilderByLanguage(language) .setPhraseQuery(phraseMultiVersion) .setNumHits(10) .search(); } float totalScore = 0; for (WikiBrainScoreDoc wikibrainScoreDoc : wikibrainScoreDocs) { totalScore += wikibrainScoreDoc.score; } for (WikiBrainScoreDoc wikibrainScoreDoc : wikibrainScoreDocs) { int localPageId = searcher.getLocalIdFromDocId(wikibrainScoreDoc.luceneId, language); LocalId localId = new LocalId(language, localPageId); result.put(localId, wikibrainScoreDoc.score / totalScore); } return result; }
@Override public List<Explanation> getExplanations(String phrase1, String phrase2, TIntFloatMap vector1, TIntFloatMap vector2, SRResult result) throws DaoException { Leaderboard lb = new Leaderboard(5); // TODO: make 5 configurable for (int id : vector1.keys()) { if (vector2.containsKey(id)) { lb.tallyScore(id, vector1.get(id) * vector2.get(id)); } } SRResultList top = lb.getTop(); if (top.numDocs() == 0) { return Arrays.asList(new Explanation("? and ? share no tags", phrase1, phrase2)); } List<Explanation> explanations = new ArrayList<Explanation>(); for (int i = 0; i < top.numDocs(); i++) { LocalPage p = pageDao.getById(language, searcher.getLocalIdFromDocId(top.getId(i), language)); if (p != null) { explanations.add(new Explanation("Both ? and ? have similar text to ?", phrase1, phrase2, p)); } } return explanations; }