@Override public LinkedHashMap<LocalId, Float> resolve(Language language, String phrase, int maxPages) throws DaoException { LinkedHashMap<LocalId, Float> result = new LinkedHashMap<LocalId, Float>(); WikiBrainScoreDoc[] wikibrainScoreDocs = searcher.getQueryBuilderByLanguage(language) .setPhraseQuery(phrase) .setNumHits(10) .search(); if (wikibrainScoreDocs.length == 0 && phrase.indexOf(" ") < 0) { String phraseMultiVersion = ""; for (int i = 1; i < phrase.length(); i++) { phraseMultiVersion += (i > 2 ? phrase.substring(0, i) + " " : ""); phraseMultiVersion += (phrase.length() - i > 2 ? phrase.substring(i, phrase.length()) + " " : ""); } wikibrainScoreDocs = searcher.getQueryBuilderByLanguage(language) .setPhraseQuery(phraseMultiVersion) .setNumHits(10) .search(); } float totalScore = 0; for (WikiBrainScoreDoc wikibrainScoreDoc : wikibrainScoreDocs) { totalScore += wikibrainScoreDoc.score; } for (WikiBrainScoreDoc wikibrainScoreDoc : wikibrainScoreDocs) { int localPageId = searcher.getLocalIdFromDocId(wikibrainScoreDoc.luceneId, language); LocalId localId = new LocalId(language, localPageId); result.put(localId, wikibrainScoreDoc.score / totalScore); } return result; }
private LinkedHashMap<LocalId, Float> resolveTextual(String phrase, int n) { if (n == 0) { return new LinkedHashMap<LocalId, Float>(); } WikiBrainScoreDoc results[] = searcher.getQueryBuilderByLanguage(language) .setPhraseQuery(new TextFieldElements().addPlainText(), phrase) .setNumHits(n*2) .search(); double total = 0.0; for (WikiBrainScoreDoc doc : results) { total += doc.score; } LinkedHashMap<LocalId, Float> expanded = new LinkedHashMap<LocalId, Float>(); for (int i = 0; i < n && i < results.length; i++) { expanded.put(new LocalId(language, results[i].wpId), (float)(results[i].score / total)); } return expanded; }
@Override public TIntFloatMap getVector(String phrase) { QueryBuilder builder = getQueryBuilder().setPhraseQuery(phrase); if (builder.hasQuery()) { WikiBrainScoreDoc[] scoreDocs = builder.search(); scoreDocs = SimUtils.pruneSimilar(scoreDocs); return SimUtils.normalizeVector(expandScores(scoreDocs)); } else { LOG.warn("Phrase cannot be parsed to get a query. "+phrase); return null; } }
@Override public TIntFloatMap getVector(int pageId) throws DaoException { int luceneId = searcher.getDocIdFromLocalId(pageId, language); if (luceneId < 0) { LOG.warn("Unindexed document " + pageId + " in " + language.getEnLangName()); return new TIntFloatHashMap(); } WikiBrainScoreDoc[] wikibrainScoreDocs = getQueryBuilder() .setMoreLikeThisQuery(luceneId) .search(); wikibrainScoreDocs = pruneSimilar(wikibrainScoreDocs); return SimUtils.normalizeVector(expandScores(wikibrainScoreDocs)); }