/** * Constructor requiring an IndexReader. */ public MoreLikeThis(IndexReader ir) { this(ir, new DefaultSimilarity()); }
DefaultSimilarity simi = new DefaultSimilarity(); for (i = 0; i < noOfTerms; i++) { int noofDocsContainTerm = re.docFreq(new Term("doccontent", terms[i])); float tf = simi.tf(freq[i]); float idf = simi.idf(noofDocsContainTerm, noOfDocs); wordMap.put(terms[i], (tf * idf));
@Override public byte[] serializeKey(final Object obj) { final ITermDocKey entry = (ITermDocKey) obj; final String termText = entry.getToken(); final double termWeight = entry.getLocalTermWeight(); /* * See: http://lucene.apache.org/core/old_versioned_docs/versions/3_0_2/api/all/org/apache/lucene/search/Similarity.html * * For more information on the round-trip of normalized term weight. */ final DefaultSimilarity similarity = new DefaultSimilarity(); final long termWeightCompact = similarity.encodeNormValue((float) termWeight); final IV docId = (IV)entry.getDocId(); final IKeyBuilder keyBuilder = getKeyBuilder(); keyBuilder.reset(); // the token text (or its successor as desired). keyBuilder .appendText(termText, true/* unicode */, false/* successor */); keyBuilder.append(termWeightCompact); IVUtility.encode(keyBuilder, docId); final byte[] key = keyBuilder.getKey(); if (log.isDebugEnabled()) { log.debug("{" + termText + "," + docId + "}, key=" + BytesUtil.toString(key)); } return key; }
final DefaultSimilarity similarity = new DefaultSimilarity(); final double termWeight = similarity.decodeNormValue(termWeightCompact);
DefaultSimilarity similarity = new DefaultSimilarity(); int docnum = reader.numDocs(); Fields fields = MultiFields.getFields(reader); for (String field : fields) { Terms terms = fields.terms(field); TermsEnum termsEnum = terms.iterator(null); while (termsEnum.next() != null) { double idf = similarity.idf(termsEnum.docFreq(), docnum); System.out.println("" + field + ":" + termsEnum.term().utf8ToString() + " idf=" + idf); } }
@Override public byte[] serializeKey(final Object obj) { final ITermDocKey entry = (ITermDocKey) obj; final String termText = entry.getToken(); final double termWeight = entry.getLocalTermWeight(); /* * See: http://lucene.apache.org/core/old_versioned_docs/versions/3_0_2/api/all/org/apache/lucene/search/Similarity.html * * For more information on the round-trip of normalized term weight. */ final DefaultSimilarity similarity = new DefaultSimilarity(); final long termWeightCompact = similarity.encodeNormValue((float) termWeight); final IV docId = (IV)entry.getDocId(); final IKeyBuilder keyBuilder = getKeyBuilder(); keyBuilder.reset(); // the token text (or its successor as desired). keyBuilder .appendText(termText, true/* unicode */, false/* successor */); keyBuilder.append(termWeightCompact); IVUtility.encode(keyBuilder, docId); final byte[] key = keyBuilder.getKey(); if (log.isDebugEnabled()) { log.debug("{" + termText + "," + docId + "}, key=" + BytesUtil.toString(key)); } return key; }
final DefaultSimilarity similarity = new DefaultSimilarity(); final double termWeight = similarity.decodeNormValue(termWeightCompact);
String field; FieldsEnum fieldsiterator; TermsEnum termsiterator; //To Simplify, you can rely on DefaultSimilarity to calculate tf and idf for you. DefaultSimilarity freqcalculator = new DefaultSimilarity() //numDocs and maxDoc are not the same thing: int numDocs = reader.numDocs(); int maxDoc = reader.maxDoc(); for (int i=0; i<maxDoc; i++) { if (reader.isDeleted(i)) continue; fieldsiterator = reader.getTermVectors(i).iterator(); while (field = fieldsiterator.next()) { termsiterator = fieldsiterator.terms().iterator(); while (terms.next()) { //id = document id, field = field name //String representations of the current term String termtext = termsiterator.term().utf8ToString(); //Get idf, using docfreq from the reader. //I haven't tested this, and I'm not quite 100% sure of the context of this method. //If it doesn't work, idfalternate below should. int idf = termsiterator.docfreq(); int idfalternate = freqcalculator.idf(reader.docFreq(field, termsiterator.term()), numDocs); } } }
/** * Constructor requiring an IndexReader. */ public MoreLikeThis(IndexReader ir) { this(ir, new DefaultSimilarity()); }
final DefaultSimilarity similarity = new DefaultSimilarity(); final long termWeightCompact = similarity.encodeNormValue((float) termWeight);
final DefaultSimilarity similarity = new DefaultSimilarity(); final double termWeight = similarity.decodeNormValue(termWeightCompact);
/** * Constructor requiring an IndexReader. */ public XMoreLikeThis(IndexReader ir) { this(ir, new DefaultSimilarity()); }
final DefaultSimilarity similarity = new DefaultSimilarity(); final long termWeightCompact = similarity.encodeNormValue((float) termWeight);
final DefaultSimilarity similarity = new DefaultSimilarity(); final double termWeight = similarity.decodeNormValue(termWeightCompact);
public Similarity getSimilarity() { return similarity == null ? new DefaultSimilarity() : similarity; }
public Similarity getSimilarity() { return similarity == null ? new DefaultSimilarity() : similarity; }