@Override public Analyzer createAnalyzer() { return new EnglishAnalyzer(); }
@Override protected Analyzer build() { return new EnglishAnalyzer(); } },
@Override protected Analyzer getAnalyzer() { return new EnglishAnalyzer(); }
/** * @param doAnalyze * @param lang * @return */ public TextTransformer withDoAnalyze(boolean doAnalyze, String lang) { this.doAnalyze = doAnalyze; if (lang.equals("en")) { analyzer = new EnglishAnalyzer(Version.LUCENE_44); } else { throw new IllegalArgumentException("unsupported language:" + lang); } return this; }
QueryParser parser = new QueryParser("Body", new EnglishAnalyzer()); Query query = parser.parse(topic); TopDocs hits = iSearcher.search(query, 1000); for (int i=0; i<hits.scoreDocs.length; i++){ Terms termVector = iSearcher.getIndexReader().getTermVector(hits.scoreDocs[i].doc, "Body"); Document doc = iSearcher.doc(hits.scoreDocs[i].doc); documentsList.put(doc, termVector); }
@Override protected Analyzer create(Version version) { Analyzer a = new EnglishAnalyzer(); a.setVersion(version.luceneVersion); return a; } },
public SimpleSearcher(String indexDir) throws IOException { Path indexPath = Paths.get(indexDir); if (!Files.exists(indexPath) || !Files.isDirectory(indexPath) || !Files.isReadable(indexPath)) { throw new IllegalArgumentException(indexDir + " does not exist or is not a directory."); } this.reader = DirectoryReader.open(FSDirectory.open(indexPath)); this.similarity = new LMDirichletSimilarity(1000.0f); this.analyzer = new EnglishAnalyzer(); this.searchtweets = false; this.isRerank = false; setDefaultReranker(); }
public void setSearchTweets(boolean flag) { this.searchtweets = flag; this.analyzer = flag? new TweetAnalyzer(true) : new EnglishAnalyzer(); }
@SuppressWarnings("resource") @Override protected TokenStream getTokenStream(final String strOrig, final boolean stemsAllowed, final boolean stopWordsAllowed) throws IOException { if (stemsAllowed) { CharArraySet stopWords = stopWordsAllowed ? EnglishAnalyzer.getDefaultStopSet() : CharArraySet.EMPTY_SET; return new EnglishAnalyzer(stopWords).tokenStream("", new StringReader(strOrig)); } else { return getStandardTokenStream(strOrig); } } }
= new EnglishAnalyzer(Version.LUCENE_36);
public void index(Book item) throws IOException{ if (iw == null) { iw = new IndexWriter(FSDirectory.open(new File(directory)), new IndexWriterConfig(Version.LATEST, new EnglishAnalyzer())); } iw.deleteDocuments(new Term(Book.ID, String.valueOf(item.getISBN()))); Document doc = new Document(); doc.add(new LongField(Book.ID, item.getISBN(),Field.Store.YES)); doc.add(new StringField(Book.AUTHOR, item.getAuthor(),Field.Store.YES)); doc.add(new StringField(Book.TITLE, item.getTitle(),Field.Store.YES)); doc.add(new TextField(Book.CONTENT, item.getContent(),Field.Store.YES)); iw.addDocument(doc); iw.commit(); }
public EnglishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new EnglishAnalyzer(Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet()), Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)); analyzer.setVersion(version); }
public void printTermCounts(String termStr) throws IOException, ParseException { EnglishAnalyzer ea = new EnglishAnalyzer(CharArraySet.EMPTY_SET); QueryParser qp = new QueryParser(LuceneDocumentGenerator.FIELD_BODY, ea); TermQuery q = (TermQuery)qp.parse(termStr); Term t = q.getTerm(); System.out.println("raw term: " + termStr); System.out.println("stemmed term: " + q.toString(LuceneDocumentGenerator.FIELD_BODY)); System.out.println("collection frequency: " + reader.totalTermFreq(t)); System.out.println("document frequency: " + reader.docFreq(t)); PostingsEnum postingsEnum = MultiFields.getTermDocsEnum(reader, LuceneDocumentGenerator.FIELD_BODY, t.bytes()); System.out.println("postings:\n"); while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { System.out.printf("\t%s, %s\n", postingsEnum.docID(), postingsEnum.freq()); } }
public EnglishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new EnglishAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, EnglishAnalyzer.getDefaultStopSet()), Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET) ); analyzer.setVersion(version); }
EnglishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); analyzer = new EnglishAnalyzer( Analysis.parseStopWords(env, indexSettings.getIndexVersionCreated(), settings, EnglishAnalyzer.getDefaultStopSet()), Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET) ); analyzer.setVersion(version); }
IndexReader indexReader = IndexReader.open(directory); = new EnglishAnalyzer(Version.LUCENE_36);