private LuceneIndexer(Language language, File root, LuceneOptions... options) throws ConfigurationException { try { this.root = root; this.language = language; this.options = options; this.mainOptions = options[0]; this.builder = new TextFieldBuilder( mainOptions.configurator.get(LocalPageDao.class), mainOptions.configurator.get(RawPageDao.class), mainOptions.configurator.get(RedirectDao.class)); File langRoot = new File(root, language.getLangCode()); if (langRoot.exists()) { FileUtils.deleteQuietly(langRoot); } WikiBrainAnalyzer analyzer = new WikiBrainAnalyzer(language, mainOptions); Directory directory = FSDirectory.open(langRoot); IndexWriterConfig iwc = new IndexWriterConfig(mainOptions.matchVersion, analyzer); writer = new IndexWriter(directory, iwc); } catch (IOException e) { throw new RuntimeException(e); } }
/** * Builds a lucene text field for page based on the specified text field elements * * @param page * @param elements * @return * @throws DaoException */ public TextField buildTextField(RawPage page, TextFieldElements elements) throws DaoException { return buildTextField( localPageDao.getById(page.getLanguage(), page.getLocalId()), page, elements); }
/** * Builds a lucene text field for page based on the specified text field elements * * @param page * @param elements * @return * @throws DaoException */ public TextField buildTextField(LocalPage page, TextFieldElements elements) throws DaoException { return buildTextField( page, rawPageDao.getById(page.getLanguage(), page.getLocalId()), elements); }
/** * Indexes a specific RawPage * * @param page the page to index */ public void indexPage(RawPage page) throws DaoException { if (closed) { throw new IllegalStateException("Indexer has already been closed!"); } if (!language.equals(page.getLanguage())) { throw new IllegalStateException("Language mismatch!"); } try { Document document = new Document(); Field localIdField = new IntField(LuceneOptions.LOCAL_ID_FIELD_NAME, page.getLocalId(), Field.Store.YES); Field langIdField = new IntField(LuceneOptions.LANG_ID_FIELD_NAME, page.getLanguage().getId(), Field.Store.YES); Field canonicalTitleField = builder.buildTextField(page, new TextFieldElements().addTitle()); document.add(localIdField); document.add(langIdField); document.add(canonicalTitleField); if (!page.isRedirect()) { for (LuceneOptions option : options) { document.add(builder.buildTextField(page, option.elements)); } } writer.addDocument(document); } catch (IOException e) { throw new RuntimeException(e); } }