/** * This method provides the contract for implementing weighting models. * @param tf The term frequency in the document * @param docLength the document's length * @return the score assigned to a document with the given tf and docLength, and other preset parameters */ public final double score(double tf, double docLength) { double TF = tf * WeightingModelLibrary.log(1.0d + (c * averageDocumentLength) / docLength); double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); //double f = this.termFrequency / numberOfDocuments; return TF * keyFrequency * i.idfDFR(documentFrequency) * NORM; } }
/** This method implements the query expansion model. * @param withinDocumentFrequency double The term frequency in the X top-retrieved documents. * @param termFrequency double The term frequency in the collection. * @return double The query expansion weight using he complete * Kullback-Leibler divergence. */ public final double score(double withinDocumentFrequency, double termFrequency) { return - log(withinDocumentFrequency / this.totalDocumentLength ); }
@Override public void setKeyFrequency(double keyFreq) { for(WeightingModel w : parents) { w.setKeyFrequency(keyFreq); } }
@Override public void setParameter(double _c) { for(WeightingModel w : parents) { w.setParameter(_c); } }
/** * Sets the number of documents in the collection. * @param numOfDocs the number of documents in the collection. */ public void setNumberOfDocuments(double numOfDocs) { this.numberOfDocuments = numOfDocs; this.i.setNumberOfDocuments(numOfDocs); }
/** * A default constructor that initialises the idf i attribute */ public WeightingModel() { i = new Idf(); }
@Override public void setRequest(Request _rq) { for(WeightingModel w : parents) { w.setRequest(_rq); } }
/** * This method provides the contract for implementing weighting models. * @param tf The term frequency in the document * @param docLength the document's length * @return the score assigned to a document with the given tf and docLength, and other preset parameters */ public final double score(double tf, double docLength) { double TF = tf * WeightingModelLibrary.log(1.0d + (c * averageDocumentLength) / docLength); double NORM = (termFrequency + 1d) / (documentFrequency * (TF + 1d)); //double f = this.termFrequency / numberOfDocuments; return TF * keyFrequency * i.idfDFR(documentFrequency) * NORM; } }
/** * This method computes the gain of encountering an extra token of the query term. * @param tf The term frequency in the document * @param documentFrequency The document frequency of the term * @param termFrequency the term frequency in the collection * @return the gain returned by the implemented formula. */ public double gain(double tf, double documentFrequency, double termFrequency){ return log((1+tf)/tf); } }
/** * Computes the score according to the model InL2. * @param tf The term frequency in the document * @param docLength the document's length * @return the score assigned to a document with the * given tf and docLength, and other preset parameters */ public final double score(double tf, double docLength) { double TF = tf * WeightingModelLibrary.log(1.0d + (c * averageDocumentLength) / docLength); double NORM = 1d / (TF + 1d); return TF * i.idfDFR(documentFrequency) * keyFrequency * NORM; } }
/** * This method provides the contract for implementing the * Stirling formula for the power series. * @param n The parameter of the Stirling formula. * @param m The parameter of the Stirling formula. * @return the approximation of the power series */ public double stirlingPower(double n, double m) { double dif = n - m; return (m + 0.5d) * log(n / m) + dif * log(n); } }
/** * Returns a concave tf as described in Singhal et al., 1999. AT&T at TREC-7. * In Proceedings of the Seventh Text REtrieval Conference (TREC-7), pages 239-252. * @param tf the term frequency to modify * @return a concave tf */ public static double tf_concave_log(double tf) { return 1 + log(1 + log(tf)); }
/** * This method computes the normaliser of parameter-free query expansion. * @param maxTermFrequency The maximum of the term frequency of the query terms. * @param collectionLength The number of tokens in the collections. * @param totalDocumentLength The sum of the length of the top-ranked documents. * @return The normaliser. */ public final double parameterFreeNormaliser(double maxTermFrequency, double collectionLength, double totalDocumentLength){ double f = (maxTermFrequency) * totalDocumentLength/collectionLength; return ((maxTermFrequency)* log((1d +f)/ f) + log(1d +f)); } /** This method implements the query expansion model.