/** * Builds a phrase query over the text field specified by elements. * * @param elements specifies the text field in which to search * @param searchString * @return */ public QueryBuilder setPhraseQuery(TextFieldElements elements, String searchString) { return setPhraseQuery(elements.getTextFieldName(), searchString); }
private QueryBuilder getQueryBuilder() { QueryBuilder builder = searcher.getQueryBuilderByLanguage(language); builder.setResolveWikipediaIds(false); if (conceptFilter != null) { builder.addFilter(conceptFilter); } return builder; }
/** * Builds a phrase query over the default text field in LuceneOptions. * * @param searchString * @return */ public QueryBuilder setPhraseQuery(String searchString) { return setPhraseQuery(searcher.getOptions().elements, searchString); }
private static TextFieldElements buildElements(int title, boolean redirects, boolean plainText) { TextFieldElements elements = new TextFieldElements(); elements.addTitle(title); if (redirects) elements.addRedirects(); if (plainText) elements.addPlainText(); return elements; }
/** * Returns the name of the text field representing only the plain text. * * @return */ public static String getPlainTextFieldName() { return new TextFieldElements().addPlainText().getTextFieldName(); }
/** * Returns the name of the text field representing only the title, once. * * @return */ public static String getTitleFieldName() { return new TextFieldElements().addTitle().getTextFieldName(); }
/** * Returns the name of the text field representing only redirected titles. * * @return */ public static String getRedirectsFieldName() { return new TextFieldElements().addRedirects().getTextFieldName(); }
public TokenizerOptions getTokenizerOptions() { TokenizerOptions options = new TokenizerOptions(); if (caseInsensitive) options.caseInsensitive(); if (useStopWords) options.useStopWords(); if (useStem) options.useStem(); return options; }
public WikiBrainScoreDoc[] search() { if (!hasQuery()) { throw new IllegalArgumentException("no query specified. call one of the QueryBuilder.set* methods to specify a query"); } return searcher.search(query, language, numHits, getFilters(), resolveWikipediaIds); }
/** * Builds a MoreLikeThis query for the specified luceneId over the * text field specified by the TextFieldElements. * * @param elements * @param luceneId * @return * @throws DaoException */ public QueryBuilder setMoreLikeThisQuery(TextFieldElements elements, int luceneId) throws DaoException { return setMoreLikeThisQuery(elements.getTextFieldName(), luceneId); }
/** * Builds a MoreLikeThis query for the specified luceneId over the * default text field in LuceneOptions. * * @param luceneId * @return * @throws DaoException */ public QueryBuilder setMoreLikeThisQuery(int luceneId) throws DaoException { return setMoreLikeThisQuery( searcher.getOptions().elements, luceneId); }
protected LanguageTokenizer(Version version, TokenizerOptions tokenizerOptions, Language language) { this.matchVersion = version; this.caseInsensitive = tokenizerOptions.isCaseInsensitive(); this.useStopWords = tokenizerOptions.doesUseStopWords(); this.useStem = tokenizerOptions.doesUseStem(); this.language = language; }
/** * Constructs a WikiBrainAnalyzer for the specified language with all filters * and default options. * * @param language the language this analyzer analyzes */ public WikiBrainAnalyzer(Language language) { this(language, LuceneOptions.getDefaultOptions()); }
/** * Runs a specified lucene query in the specified language. * * @param query * @return */ public WikiBrainScoreDoc[] search(Query query, Language language) { return search(query, language, this.hitCount, null); }
public void endLoad() { if (luceneIndexer != null) { luceneIndexer.close(); } }
private static TokenizerOptions buildOptions(boolean caseInsensitive, boolean useStopWords, boolean useStem) { TokenizerOptions options = new TokenizerOptions(); if (caseInsensitive) options.caseInsensitive(); if (useStopWords) options.useStopWords(); if (useStem) options.useStem(); return options; }
/** * Constructs a LuceneIndexer that will index any RawPage in a * specified Language. Indexes are then placed in language-specific * subdirectories in the specified file. * * @param language the language in which this searcher can operate * @param root the root directory in which to save all the lucene directories */ public LuceneIndexer(Language language, File root) throws ConfigurationException { this(language, root, LuceneOptions.getDefaultOptions()); }
/** * Runs a specified lucene query in the specified language with a specified hitcount. * @param query * @param language * @param hitCount * @return */ public WikiBrainScoreDoc[] search(Query query, Language language, int hitCount, Filter filter) { return search(query, language, hitCount, filter, true); }
public void endLoad() { if (luceneIndexer != null) { luceneIndexer.close(); } }
/** * Constructs a LuceneSearcher that will run lucene queries on sets of articles * in any language in the LanguageSet. Note that root is the parent directory * of the directory where lucene indexes are stored, though it is the same * directory as was passed to the LuceneIndexer. * * @param languages the language set in which this searcher can operate * @param root the root directory in which each language contains its own lucene directory */ public LuceneSearcher(LanguageSet languages, File root) { this(languages, root, LuceneOptions.getDefaultOptions()); }