/** * Builds a phrase query over the text field specified by elements. * * @param elements specifies the text field in which to search * @param searchString * @return */ public QueryBuilder setPhraseQuery(TextFieldElements elements, String searchString) { return setPhraseQuery(elements.getTextFieldName(), searchString); }
public WikiBrainScoreDoc[] search() { if (!hasQuery()) { throw new IllegalArgumentException("no query specified. call one of the QueryBuilder.set* methods to specify a query"); } return searcher.search(query, language, numHits, getFilters(), resolveWikipediaIds); }
/** * Builds a MoreLikeThis query for the specified luceneId over the * text field specified by the TextFieldElements. * * @param elements * @param luceneId * @return * @throws DaoException */ public QueryBuilder setMoreLikeThisQuery(TextFieldElements elements, int luceneId) throws DaoException { return setMoreLikeThisQuery(elements.getTextFieldName(), luceneId); }
private QueryBuilder getQueryBuilder() { QueryBuilder builder = searcher.getQueryBuilderByLanguage(language); builder.setResolveWikipediaIds(false); if (conceptFilter != null) { builder.addFilter(conceptFilter); } return builder; }
@Override public LinkedHashMap<LocalId, Float> resolve(Language language, String phrase, int maxPages) throws DaoException { LinkedHashMap<LocalId, Float> result = new LinkedHashMap<LocalId, Float>(); WikiBrainScoreDoc[] wikibrainScoreDocs = searcher.getQueryBuilderByLanguage(language) .setPhraseQuery(phrase) .setNumHits(10) .search(); if (wikibrainScoreDocs.length == 0 && phrase.indexOf(" ") < 0) { String phraseMultiVersion = ""; for (int i = 1; i < phrase.length(); i++) { phraseMultiVersion += (i > 2 ? phrase.substring(0, i) + " " : ""); phraseMultiVersion += (phrase.length() - i > 2 ? phrase.substring(i, phrase.length()) + " " : ""); } wikibrainScoreDocs = searcher.getQueryBuilderByLanguage(language) .setPhraseQuery(phraseMultiVersion) .setNumHits(10) .search(); } float totalScore = 0; for (WikiBrainScoreDoc wikibrainScoreDoc : wikibrainScoreDocs) { totalScore += wikibrainScoreDoc.score; } for (WikiBrainScoreDoc wikibrainScoreDoc : wikibrainScoreDocs) { int localPageId = searcher.getLocalIdFromDocId(wikibrainScoreDoc.luceneId, language); LocalId localId = new LocalId(language, localPageId); result.put(localId, wikibrainScoreDoc.score / totalScore); } return result; }
@Override public TIntFloatMap getVector(String phrase) { QueryBuilder builder = getQueryBuilder().setPhraseQuery(phrase); if (builder.hasQuery()) { WikiBrainScoreDoc[] scoreDocs = builder.search(); scoreDocs = SimUtils.pruneSimilar(scoreDocs); return SimUtils.normalizeVector(expandScores(scoreDocs)); } else { LOG.warn("Phrase cannot be parsed to get a query. "+phrase); return null; } }
@Override public TIntFloatMap getVector(int pageId) throws DaoException { int luceneId = searcher.getDocIdFromLocalId(pageId, language); if (luceneId < 0) { LOG.warn("Unindexed document " + pageId + " in " + language.getEnLangName()); return new TIntFloatHashMap(); } WikiBrainScoreDoc[] wikibrainScoreDocs = getQueryBuilder() .setMoreLikeThisQuery(luceneId) .search(); wikibrainScoreDocs = pruneSimilar(wikibrainScoreDocs); return SimUtils.normalizeVector(expandScores(wikibrainScoreDocs)); }
public QueryBuilder getQueryBuilderByLanguage(Language language) { if (!analyzers.containsKey(language)) throw new IllegalArgumentException("Unknown language: " + language); return new QueryBuilder(this, language); }
private LinkedHashMap<LocalId, Float> resolveTextual(String phrase, int n) { if (n == 0) { return new LinkedHashMap<LocalId, Float>(); } WikiBrainScoreDoc results[] = searcher.getQueryBuilderByLanguage(language) .setPhraseQuery(new TextFieldElements().addPlainText(), phrase) .setNumHits(n*2) .search(); double total = 0.0; for (WikiBrainScoreDoc doc : results) { total += doc.score; } LinkedHashMap<LocalId, Float> expanded = new LinkedHashMap<LocalId, Float>(); for (int i = 0; i < n && i < results.length; i++) { expanded.put(new LocalId(language, results[i].wpId), (float)(results[i].score / total)); } return expanded; }
.setPhraseQuery(new TextFieldElements().addTitle(), phrase) .setNumHits(maxPages * DOC_MULTIPLIER) .search(); .setPhraseQuery(new TextFieldElements().addPlainText(), phrase) .setNumHits(maxPages * DOC_MULTIPLIER) .search(); .setPhraseQuery(phraseMultiVersion) .setNumHits(10) .search();
/** * Builds a phrase query over the default text field in LuceneOptions. * * @param searchString * @return */ public QueryBuilder setPhraseQuery(String searchString) { return setPhraseQuery(searcher.getOptions().elements, searchString); }
/** * Builds a MoreLikeThis query for the specified luceneId over the * default text field in LuceneOptions. * * @param luceneId * @return * @throws DaoException */ public QueryBuilder setMoreLikeThisQuery(int luceneId) throws DaoException { return setMoreLikeThisQuery( searcher.getOptions().elements, luceneId); }