if (minDF > 0 && allTerms[i].getDocumentFrequency() < minDF && !originalTermids.contains(allTerms[i].getTermID())) allTerms[i].setWeightExpansion(0); continue; Map.Entry<String, LexiconEntry> lee = lexicon.getLexiconEntry(allTerms[i].getTermID()); if (lee == null) logger.error("Termid " + allTerms[i].getTermID() +" was not found in the lexicon"); continue; allTerms[i].setWeightExpansion(QEModel.score( allTerms[i].getWithinDocumentFrequency(), TF if (allTerms[i].getWeightExpansion() > allTerms[posMaxWeight].getWeightExpansion()) posMaxWeight = i; normaliser = allTerms[posMaxWeight].getWeightExpansion(); if (QEModel.PARAMETER_FREE){ QEModel.setMaxTermFrequency(allTerms[posMaxWeight].getWithinDocumentFrequency()); normaliser = QEModel.parameterFreeNormaliser(); if(logger.isDebugEnabled()){ allTerms[i].setWeightExpansion(allTerms[i].getWeightExpansion()/normaliser); allTerms[i].setWeightExpansion(allTerms[i].getWeightExpansion()*QEModel.ROCCHIO_BETA);
if (minDF > 0 && allTerms[i].getDocumentFrequency() < minDF && !originalTermids.contains(allTerms[i].getTermID())) allTerms[i].setWeightExpansion(0); continue; Map.Entry<String, LexiconEntry> lee = lexicon.getLexiconEntry(allTerms[i].getTermID()); if (lee == null) logger.error("Termid " + allTerms[i].getTermID() +" was not found in the lexicon"); continue; allTerms[i].setWeightExpansion(QEModel.score( allTerms[i].getWithinDocumentFrequency(), TF if (allTerms[i].getWeightExpansion() > allTerms[posMaxWeight].getWeightExpansion()) posMaxWeight = i; normaliser = allTerms[posMaxWeight].getWeightExpansion(); if (QEModel.PARAMETER_FREE){ QEModel.setMaxTermFrequency(allTerms[posMaxWeight].getWithinDocumentFrequency()); normaliser = QEModel.parameterFreeNormaliser(); if(logger.isDebugEnabled()){ allTerms[i].setWeightExpansion(allTerms[i].getWeightExpansion()/normaliser); allTerms[i].setWeightExpansion(allTerms[i].getWeightExpansion()*QEModel.ROCCHIO_BETA);
logger.debug("First weight = "+termEntries[0].getWeightExpansion() + " last weight="+termEntries[termEntries.length-1].getWeightExpansion()); for (int i = 0; i < numberOfExpandedTerms; i++) Map.Entry<String,LexiconEntry> lee = lexicon.getLexiconEntry(termEntries[i].getTermID()); results[i] = new SingleTermQuery(lee.getKey()); results[i].setWeight(termEntries[i].getWeightExpansion()); results[i].setWeight(terms.get(termId).getWeightExpansion());
/** * Returns the weight of a term with the given * term identifier, computed by the specified * query expansion model. * @param termId int the term identifier to set the weight for. * @param model QueryExpansionModel the used query expansion model. * @return double the weight of the specified term. */ public double getExpansionWeight(int termId, QueryExpansionModel model){ double score = 0; ExpansionTerm o = terms.get(termId); if (o != null) { double TF = 0; //double Nt = 0; Map.Entry<String, LexiconEntry> lee = lexicon.getLexiconEntry(termId); TF = lee.getValue().getFrequency(); //Nt = lee.getValue().getDocumentFrequency(); score = model.score(o.getWithinDocumentFrequency(), TF, this.totalDocumentLength, this.numberOfTokens, this.averageDocumentLength ); } return score; }
logger.debug("First weight = "+termEntries[0].getWeightExpansion() + " last weight="+termEntries[termEntries.length-1].getWeightExpansion()); for (int i = 0; i < numberOfExpandedTerms; i++) Map.Entry<String,LexiconEntry> lee = lexicon.getLexiconEntry(termEntries[i].getTermID()); results[i] = new SingleTermQuery(lee.getKey()); results[i].setWeight(termEntries[i].getWeightExpansion()); results[i].setWeight(terms.get(termId).getWeightExpansion());
/** * Returns the weight of a term with the given * term identifier, computed by the specified * query expansion model. * @param termId int the term identifier to set the weight for. * @param model QueryExpansionModel the used query expansion model. * @return double the weight of the specified term. */ public double getExpansionWeight(int termId, QueryExpansionModel model){ double score = 0; ExpansionTerm o = terms.get(termId); if (o != null) { double TF = 0; //double Nt = 0; Map.Entry<String, LexiconEntry> lee = lexicon.getLexiconEntry(termId); TF = lee.getValue().getFrequency(); //Nt = lee.getValue().getDocumentFrequency(); score = model.score(o.getWithinDocumentFrequency(), TF, this.totalDocumentLength, this.numberOfTokens, this.averageDocumentLength ); } return score; }
/** * Add a term in the X top-retrieved documents as a candidate of the * expanded terms. * @param termID int the integer identifier of a term * @param withinDocumentFrequency double the within document * frequency of a term */ protected void insertTerm(int termID, double withinDocumentFrequency) { final ExpansionTerm et = terms.get(termID); if (et == null) terms.put(termID, new ExpansionTerm(termID, withinDocumentFrequency)); else et.insertRecord(withinDocumentFrequency); } }
/** * Add a term in the X top-retrieved documents as a candidate of the * expanded terms. * @param termID int the integer identifier of a term * @param withinDocumentFrequency double the within document * frequency of a term */ protected void insertTerm(int termID, double withinDocumentFrequency) { final ExpansionTerm et = terms.get(termID); if (et == null) terms.put(termID, new ExpansionTerm(termID, withinDocumentFrequency)); else et.insertRecord(withinDocumentFrequency); } }
/** Returns the probability of a given termid occurring * in the expansion documents. Returns the quotient * document frequency in the expansion documents, divided * by the total length of all the expansion documents. * @param termId int the term identifier to obtain the probability * @return double the probability of the term */ public double getExpansionProbability(int termId) { ExpansionTerm o = terms.get(termId); if (o == null) return -1; return o.getDocumentFrequency() / totalDocumentLength; } /**
/** * Returns the number of the top-ranked documents a given term occurs in. * @param termId int the id of the term to get the frequency for. * @return double the document frequency of the specified term in the top-ranked documents. */ public double getDocumentFrequency(int termId){ ExpansionTerm o = terms.get(termId); if (o == null) return 0; return o.getDocumentFrequency(); }
/** * Returns the frequency of a given term in the top-ranked documents. * @param termId int the id of the term to get the frequency for. * @return double the frequency of the specified term in the top-ranked documents. */ public double getFrequency(int termId){ ExpansionTerm o = terms.get(termId); if (o == null) return 0; return o.getWithinDocumentFrequency(); }
/** * Returns the weight of a term with the given * term identifier. * @param termId int the term identifier to set the weight for. * @return double the weight of the specified term. */ public double getExpansionWeight(int termId){ ExpansionTerm o = terms.get(termId); if (o == null) return -1; return o.getWeightExpansion(); }
public int compare(ExpansionTerm t1, ExpansionTerm t2) { final double res = t1.getWeightExpansion() - t2.getWeightExpansion(); return res>0.0d ? -1 : (res< 0.0d ? 1 : 0); } };
/** * Returns the weight of a term with the given * term identifier. * @param termId int the term identifier to set the weight for. * @return double the weight of the specified term. */ public double getExpansionWeight(int termId){ ExpansionTerm o = terms.get(termId); if (o == null) return -1; return o.getWeightExpansion(); }
/** Returns the probability of a given termid occurring * in the expansion documents. Returns the quotient * document frequency in the expansion documents, divided * by the total length of all the expansion documents. * @param termId int the term identifier to obtain the probability * @return double the probability of the term */ public double getExpansionProbability(int termId) { ExpansionTerm o = terms.get(termId); if (o == null) return -1; return o.getDocumentFrequency() / totalDocumentLength; } /**
/** * Returns the number of the top-ranked documents a given term occurs in. * @param termId int the id of the term to get the frequency for. * @return double the document frequency of the specified term in the top-ranked documents. */ public double getDocumentFrequency(int termId){ ExpansionTerm o = terms.get(termId); if (o == null) return 0; return o.getDocumentFrequency(); }
/** * Returns the frequency of a given term in the top-ranked documents. * @param termId int the id of the term to get the frequency for. * @return double the frequency of the specified term in the top-ranked documents. */ public double getFrequency(int termId){ ExpansionTerm o = terms.get(termId); if (o == null) return 0; return o.getWithinDocumentFrequency(); }
public int compare(ExpansionTerm t1, ExpansionTerm t2) { final double res = t1.getWeightExpansion() - t2.getWeightExpansion(); return res>0.0d ? -1 : (res< 0.0d ? 1 : 0); } };