/** * Constructor requiring an IndexReader. */ public MoreLikeThis(IndexReader ir) { this(ir, new DefaultSimilarity()); }
/** * Constructor requiring an IndexReader. */ public MoreLikeThis(IndexReader ir) { this(ir, new DefaultSimilarity()); }
/** * Constructor requiring an IndexReader. */ public XMoreLikeThis(IndexReader ir) { this(ir, new DefaultSimilarity()); }
public Similarity getSimilarity() { return similarity == null ? new DefaultSimilarity() : similarity; }
public Similarity getSimilarity() { return similarity == null ? new DefaultSimilarity() : similarity; }
public class MyCustomSimilarity extends PerFieldSimilarityWrapper { @Override public Similarity get(String fieldName) { if (fieldName.equals("author")) { return new CustomAuthorSimilarity(); } else { return new DefaultSimilarity(); } } }
public WikiIndex(int maxHits, String indexPath, String searchField, boolean approximate) { this.maxHits = maxHits; this.indexPath = indexPath; this.searchField = searchField; this.similarity = approximate ? new ApproximateSimilarity() : new DefaultSimilarity(); }
public WikiIndex(int maxHits, String indexPath, String searchField, boolean approximate) { this.maxHits = maxHits; this.indexPath = indexPath; this.searchField = searchField; this.similarity = approximate ? new ApproximateSimilarity() : new DefaultSimilarity(); }
/** * uses defaultSimilarity to compute idf. DefaultSimilarity computes idf as * 1 + log (numDocs/ docFreq + 1) * * @param reader * @param field * @return * @throws IOException */ public static Map<String, Float> getIdfs(IndexReader reader, String field) throws IOException { // DefaultSimilarity computes idf as 1 + log (numDocs/ docFreq + 1) return getIdfs(reader, field, new DefaultSimilarity()); }
/** * @deprecated use {@link ZoieSystem#buildDefaultInstance(File, ZoieIndexableInterpreter, ZoieConfig)} * @param idxDir * @param interpreter * @param batchSize * @param batchDelay * @param realtime * @param versionComparator */ @Deprecated public static <D> ZoieSystem<IndexReader, D> buildDefaultInstance(File idxDir, ZoieIndexableInterpreter<D> interpreter, int batchSize, long batchDelay, boolean realtime, Comparator<String> versionComparator) { return buildDefaultInstance(idxDir, interpreter, new StandardAnalyzer(Version.LUCENE_43), new DefaultSimilarity(), batchSize, batchDelay, realtime, versionComparator); }
/** * @deprecated use {@link ZoieSystem#buildDefaultInstance(File, ZoieIndexableInterpreter, ZoieConfig)} * @param idxDir * @param interpreter * @param batchSize * @param batchDelay * @param realtime * @param versionComparator */ @Deprecated public static <D> ZoieSystem<IndexReader, D> buildDefaultInstance(File idxDir, ZoieIndexableInterpreter<D> interpreter, int batchSize, long batchDelay, boolean realtime, Comparator<String> versionComparator) { return buildDefaultInstance(idxDir, interpreter, new StandardAnalyzer(Version.LUCENE_43), new DefaultSimilarity(), batchSize, batchDelay, realtime, versionComparator); }
public TermVectorsFilter(Fields termVectorsByField, Fields topLevelFields, Set<String> selectedFields, @Nullable AggregatedDfs dfs) { this.fields = termVectorsByField; this.topLevelFields = topLevelFields; this.selectedFields = selectedFields; this.dfs = dfs; this.scoreTerms = new HashMap<>(); this.sizes = AtomicLongMap.create(); this.similarity = new DefaultSimilarity(); }
DefaultSimilarity similarity = new DefaultSimilarity(); int docnum = reader.numDocs(); Fields fields = MultiFields.getFields(reader); for (String field : fields) { Terms terms = fields.terms(field); TermsEnum termsEnum = terms.iterator(null); while (termsEnum.next() != null) { double idf = similarity.idf(termsEnum.docFreq(), docnum); System.out.println("" + field + ":" + termsEnum.term().utf8ToString() + " idf=" + idf); } }
String field; FieldsEnum fieldsiterator; TermsEnum termsiterator; //To Simplify, you can rely on DefaultSimilarity to calculate tf and idf for you. DefaultSimilarity freqcalculator = new DefaultSimilarity() //numDocs and maxDoc are not the same thing: int numDocs = reader.numDocs(); int maxDoc = reader.maxDoc(); for (int i=0; i<maxDoc; i++) { if (reader.isDeleted(i)) continue; fieldsiterator = reader.getTermVectors(i).iterator(); while (field = fieldsiterator.next()) { termsiterator = fieldsiterator.terms().iterator(); while (terms.next()) { //id = document id, field = field name //String representations of the current term String termtext = termsiterator.term().utf8ToString(); //Get idf, using docfreq from the reader. //I haven't tested this, and I'm not quite 100% sure of the context of this method. //If it doesn't work, idfalternate below should. int idf = termsiterator.docfreq(); int idfalternate = freqcalculator.idf(reader.docFreq(field, termsiterator.term()), numDocs); } } }
@Override public byte[] serializeKey(final Object obj) { final ITermDocKey entry = (ITermDocKey) obj; final String termText = entry.getToken(); final double termWeight = entry.getLocalTermWeight(); /* * See: http://lucene.apache.org/core/old_versioned_docs/versions/3_0_2/api/all/org/apache/lucene/search/Similarity.html * * For more information on the round-trip of normalized term weight. */ final DefaultSimilarity similarity = new DefaultSimilarity(); final long termWeightCompact = similarity.encodeNormValue((float) termWeight); final IV docId = (IV)entry.getDocId(); final IKeyBuilder keyBuilder = getKeyBuilder(); keyBuilder.reset(); // the token text (or its successor as desired). keyBuilder .appendText(termText, true/* unicode */, false/* successor */); keyBuilder.append(termWeightCompact); IVUtility.encode(keyBuilder, docId); final byte[] key = keyBuilder.getKey(); if (log.isDebugEnabled()) { log.debug("{" + termText + "," + docId + "}, key=" + BytesUtil.toString(key)); } return key; }
@Override public Query rewrite(IndexReader reader) throws IOException { XMoreLikeThis mlt = new XMoreLikeThis(reader, similarity == null ? new DefaultSimilarity() : similarity); mlt.setFieldNames(moreLikeFields); mlt.setAnalyzer(analyzer); mlt.setMinTermFreq(minTermFrequency); mlt.setMinDocFreq(minDocFreq); mlt.setMaxDocFreq(maxDocFreq); mlt.setMaxQueryTerms(maxQueryTerms); mlt.setMinWordLen(minWordLen); mlt.setMaxWordLen(maxWordLen); mlt.setStopWords(stopWords); mlt.setBoost(boostTerms); mlt.setBoostFactor(boostTermsFactor); if (this.unlikeText != null || this.unlikeFields != null) { handleUnlike(mlt, this.unlikeText, this.unlikeFields); } return createQuery(mlt); }
@Override public byte[] serializeKey(final Object obj) { final ITermDocKey entry = (ITermDocKey) obj; final String termText = entry.getToken(); final double termWeight = entry.getLocalTermWeight(); /* * See: http://lucene.apache.org/core/old_versioned_docs/versions/3_0_2/api/all/org/apache/lucene/search/Similarity.html * * For more information on the round-trip of normalized term weight. */ final DefaultSimilarity similarity = new DefaultSimilarity(); final long termWeightCompact = similarity.encodeNormValue((float) termWeight); final IV docId = (IV)entry.getDocId(); final IKeyBuilder keyBuilder = getKeyBuilder(); keyBuilder.reset(); // the token text (or its successor as desired). keyBuilder .appendText(termText, true/* unicode */, false/* successor */); keyBuilder.append(termWeightCompact); IVUtility.encode(keyBuilder, docId); final byte[] key = keyBuilder.getKey(); if (log.isDebugEnabled()) { log.debug("{" + termText + "," + docId + "}, key=" + BytesUtil.toString(key)); } return key; }
final DefaultSimilarity similarity = new DefaultSimilarity();
protected static RandomIndexWriter newRandomIndexWriter(final Directory dir, final Analyzer analyzer, final Codec codec) throws IOException { return newRandomIndexWriter(dir, analyzer, codec, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setCodec(codec).setMergePolicy(newLogMergePolicy()) .setSimilarity(new DefaultSimilarity())); }
protected static RandomIndexWriter newRandomIndexWriter(final Directory dir, final Analyzer analyzer, final Codec codec) throws IOException { return newRandomIndexWriter(dir, analyzer, codec, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setCodec(codec).setMergePolicy(newLogMergePolicy()) .setSimilarity(new DefaultSimilarity())); }