/** * Uses TF_IDF to compute a weight for a term in a document. * @param tf The term frequency of the term in the document * @param docLength the document's length * @return the score assigned to a document with the given * tf and docLength, and other preset parameters */ public final double score(double tf, double docLength) { double Robertson_tf = k_1*tf/(tf+k_1*(1-b+b*docLength/averageDocumentLength)); double idf = WeightingModelLibrary.log(numberOfDocuments/documentFrequency+1); return keyFrequency * Robertson_tf * idf; }
/** * This method provides the contract for implementing the Stirling formula for the power series. * @param n The parameter of the Stirling formula. * @param m The parameter of the Stirling formula. * @return the approximation of the power series */ public static double stirlingPower(double n, double m) { double dif = n - m; return (m + 0.5d) * log(n / m) + dif * log(n); }
/** * This method provides the contract for implementing the * Stirling formula for the power series. * @param n The parameter of the Stirling formula. * @param m The parameter of the Stirling formula. * @return the approximation of the power series */ public static double stirlingPower(double n, double m) { double dif = n - m; return (m + 0.5d) * log(n / m) + dif * log(n); } }
/** * This method provides the contract for implementing the * Stirling formula for the power series. * @param n The parameter of the Stirling formula. * @param m The parameter of the Stirling formula. * @return the approximation of the power series */ public double stirlingPower(double n, double m) { double dif = n - m; return (m + 0.5d) * log(n / m) + dif * log(n); } }
/** * Returns a concave tf as described in Singhal et al., 1999. AT&T at TREC-7. * In Proceedings of the Seventh Text REtrieval Conference (TREC-7), pages 239-252. * @param tf the term frequency to modify * @return a concave tf */ public static double tf_concave_log(double tf) { return 1 + log(1 + log(tf)); }
/** This method implements the query expansion model. * @param withinDocumentFrequency double The term frequency in the X top-retrieved documents. * @param termFrequency double The term frequency in the collection. * @return double The query expansion weight using he complete * Kullback-Leibler divergence. */ public final double score(double withinDocumentFrequency, double termFrequency) { return - log(withinDocumentFrequency / this.totalDocumentLength ); }
/** * This method provides the contract for implementing the Stirling formula for the power series. * @param n The parameter of the Stirling formula. * @param m The parameter of the Stirling formula. * @return the approximation of the power series */ public static double stirlingPower(double n, double m) { double dif = n - m; return (m + 0.5d) * log(n / m) + dif * log(n); }
/** * Returns the base 2 log of d1 over d2. Do not use, moved to WeightingModelLibrary * @param d1 the numerator * @param d2 the denominator * @return the base 2 log of d1/d2 * @return the base 2 log of the given numbers * Do not use, moved to WeightingModelLibrary */ @Deprecated public static double log(double d1, double d2) { return WeightingModelLibrary.log(d1,d2); }
/** *Returns the base 2 log of the given double precision number. * Method has been moved to WeightingModelLibrary * @param d The number of which the log we will compute * @return the base 2 log of the given numbers */ @Deprecated public static double log(double d) { return WeightingModelLibrary.log(d); }
@Override public double score(double tf, double docLength) { double e_ij = (termFrequency * docLength) / numberOfTokens; // Condition 1 if ( tf <= e_ij ) return 0D; double chiSquare = ( Math.pow( (tf - e_ij), 2 )/e_ij ) + 1; return keyFrequency * WeightingModelLibrary.log(chiSquare); }
/** * Returns the base 2 log of d1 over d2. Do not use, moved to WeightingModelLibrary * @param d1 the numerator * @param d2 the denominator * @return the base 2 log of d1/d2 * @return the base 2 log of the given numbers * Do not use, moved to WeightingModelLibrary */ @Deprecated public static double log(double d1, double d2) { return WeightingModelLibrary.log(d1,d2); }
/** * Return the normalised idf of the given number. * @param d The number of which the idf is computed. * @return the normalised idf of d */ public double idfN(double d) { return (WeightingModelLibrary.log(numberOfDocuments, d) / log(numberOfDocuments)); } /**
/** * Return the normalised idf of the given number. * @param d The number of which the idf is computed. * @return the normalised idf of d */ public double idfN(int d) { return (WeightingModelLibrary.log(numberOfDocuments, (double)d) / log(numberOfDocuments)); }
/** * The normalised INQUERY idf formula * @param d the number for which we will compute the normalised idf * @return the normalised INQUERY idf of d */ public double idfNENQUIRY(double d) { return (WeightingModelLibrary.log(numberOfDocuments + 1.0D, d + 0.5D) / log(numberOfDocuments+1.0D)); }
/** * Return the normalised idf of the given number. * @param d The number of which the idf is computed. * @return the normalised idf of d */ public double idfN(double d) { return (WeightingModelLibrary.log(numberOfDocuments, d) / log(numberOfDocuments)); } /**
/** * Return the normalised idf of the given number. * @param d The number of which the idf is computed. * @return the normalised idf of d */ public double idfN(int d) { return (WeightingModelLibrary.log(numberOfDocuments, (double)d) / log(numberOfDocuments)); }