@Override public double calculate(int tf, int df, int length, int numDocs) { // ignore length return sim.tf(tf) * sim.idf(df, numDocs); } }
@Override public double calculate(int tf, int df, int length, int numDocs) { // ignore length return sim.tf(tf) * sim.idf(df, numDocs); } }
DefaultSimilarity similarity = new DefaultSimilarity(); int docnum = reader.numDocs(); Fields fields = MultiFields.getFields(reader); for (String field : fields) { Terms terms = fields.terms(field); TermsEnum termsEnum = terms.iterator(null); while (termsEnum.next() != null) { double idf = similarity.idf(termsEnum.docFreq(), docnum); System.out.println("" + field + ":" + termsEnum.term().utf8ToString() + " idf=" + idf); } }
float idf = simi.idf(noofDocsContainTerm, noOfDocs); wordMap.put(terms[i], (tf * idf));
String field; FieldsEnum fieldsiterator; TermsEnum termsiterator; //To Simplify, you can rely on DefaultSimilarity to calculate tf and idf for you. DefaultSimilarity freqcalculator = new DefaultSimilarity() //numDocs and maxDoc are not the same thing: int numDocs = reader.numDocs(); int maxDoc = reader.maxDoc(); for (int i=0; i<maxDoc; i++) { if (reader.isDeleted(i)) continue; fieldsiterator = reader.getTermVectors(i).iterator(); while (field = fieldsiterator.next()) { termsiterator = fieldsiterator.terms().iterator(); while (terms.next()) { //id = document id, field = field name //String representations of the current term String termtext = termsiterator.term().utf8ToString(); //Get idf, using docfreq from the reader. //I haven't tested this, and I'm not quite 100% sure of the context of this method. //If it doesn't work, idfalternate below should. int idf = termsiterator.docfreq(); int idfalternate = freqcalculator.idf(reader.docFreq(field, termsiterator.term()), numDocs); } } }
double idf = similarity.idf(indexReader.docFreq(new Term("text", key)), numDocs); tfIdfVector.put(key, tf*idf);
double idf = similarity.idf(indexReader.docFreq(new Term("text", key)), numDocs); tfIdfVector.put(key, tf*idf);