@Override public int getDocumentLength() { return fieldParent.getFieldLengths()[fieldId]; }
@Override public int[] getFieldLengths() { return ((FieldPosting)current).getFieldLengths(); }
@Override public int[] getFieldLengths() { return ((FieldPosting)current).getFieldLengths(); }
@Override public int getDocumentLength() { return fieldParent.getFieldLengths()[fieldId]; }
@Override public int[] getFieldLengths() { return ((FieldPosting)current).getFieldLengths(); }
@Override public int[] getFieldLengths() { return ((FieldPosting)current).getFieldLengths(); }
@Override protected void firstPosting(Posting _p) { super.firstPosting(_p); FieldPosting p = (FieldPosting)_p; System.arraycopy(p.getFieldFrequencies(), 0, fieldFreqs, 0, fieldCount); System.arraycopy(p.getFieldLengths(), 0, fieldLens, 0, fieldCount); }
@Override protected void firstPosting(Posting _p) { super.firstPosting(_p); FieldPosting p = (FieldPosting)_p; System.arraycopy(p.getFieldFrequencies(), 0, fieldFreqs, 0, fieldCount); System.arraycopy(p.getFieldLengths(), 0, fieldLens, 0, fieldCount); }
double tf_q = super.termFrequency; final int[] tff = fp.getFieldFrequencies(); final int[] fieldLengths = fp.getFieldLengths();
double tf_q = super.termFrequency; final int[] tff = fp.getFieldFrequencies(); final int[] fieldLengths = fp.getFieldLengths();
double denom = 0.0d; final int[] tff = fp.getFieldFrequencies(); final int[] fieldLengths = fp.getFieldLengths();
double denom = 0.0d; final int[] tff = fp.getFieldFrequencies(); final int[] fieldLengths = fp.getFieldLengths();
System.out.print(" l=" + postings[i][j].getDocumentLength()); if (fields) System.out.print(" lf=" + ArrayUtils.join(((FieldPosting) postings[i][j]).getFieldLengths(), ",")); System.out.print(' ');
@Override public double score(Posting _p) { FieldPosting p = (FieldPosting)_p; final int[] tff = p.getFieldFrequencies(); final int[] lf = p.getFieldLengths(); //System.err.println("tff=" + ArrayUtils.join(tff, ",")); //System.err.println("lf=" + ArrayUtils.join(lf, ",")); assert lf != null : "No fields lengths from posting "; assert tff.length == lf.length : "Mismatch between lengths of field length and frequencies"; int tf = 0, l = 0; for(int fieldId : activeFieldIds) { tf += tff[fieldId]; l += lf[fieldId]; } if (tf == 0) return 0; assert l > 0 : "Frequency but no length for docid " + p.getId(); //System.err.println("tf=" + tf + " l="+l); final double rtr = basicModel.score(tf, l); if (Double.isNaN(rtr)) System.err.println("BPosting " + p.getId() + " had NaN : tf=" + tf + " l="+l+ " tf=" + org.terrier.utility.ArrayUtils.join(tff, ",") + " lf=" + org.terrier.utility.ArrayUtils.join(lf, ",")); return rtr; }
@Override public double score(Posting _p) { FieldPosting p = (FieldPosting)_p; final int[] tff = p.getFieldFrequencies(); final int[] lf = p.getFieldLengths(); //System.err.println("tff=" + ArrayUtils.join(tff, ",")); //System.err.println("lf=" + ArrayUtils.join(lf, ",")); assert lf != null : "No fields lengths from posting "; assert tff.length == lf.length : "Mismatch between lengths of field length and frequencies"; int tf = 0, l = 0; for(int fieldId : activeFieldIds) { tf += tff[fieldId]; l += lf[fieldId]; } if (tf == 0) return 0; assert l > 0 : "Frequency but no length for docid " + p.getId(); //System.err.println("tf=" + tf + " l="+l); final double rtr = basicModel.score(tf, l); if (Double.isNaN(rtr)) System.err.println("BPosting " + p.getId() + " had NaN : tf=" + tf + " l="+l+ " tf=" + org.terrier.utility.ArrayUtils.join(tff, ",") + " lf=" + org.terrier.utility.ArrayUtils.join(lf, ",")); return rtr; }
@Override public double score(Posting _p) { FieldPosting p = (FieldPosting)_p; final int[] tff = p.getFieldFrequencies(); final int[] fieldLengths = p.getFieldLengths(); final double[] normFieldFreqs = new double[fieldCount]; for(int i=0;i<fieldCount;i++) { if (tff[i] != 0.0d) normFieldFreqs[i] = fieldWeights[i] * fieldNormalisations[i].normalise(tff[i], fieldLengths[i], fieldGlobalFrequencies[i]); } final double tf = StaTools.sum(normFieldFreqs); //System.err.println("tf=" + tf); if (tf == 0.0d) return 0.0d; return basicModel.score(tf, super.documentFrequency, super.termFrequency, super.keyFrequency, p.getDocumentLength()); }
@Override public double score(Posting _p) { FieldPosting p = (FieldPosting)_p; final int[] tff = p.getFieldFrequencies(); final int[] fieldLengths = p.getFieldLengths(); final double[] normFieldFreqs = new double[fieldCount]; for(int i=0;i<fieldCount;i++) { if (tff[i] != 0.0d) normFieldFreqs[i] = fieldWeights[i] * fieldNormalisations[i].normalise(tff[i], fieldLengths[i], fieldGlobalFrequencies[i]); } final double tf = StaTools.sum(normFieldFreqs); //System.err.println("tf=" + tf); if (tf == 0.0d) return 0.0d; return basicModel.score(tf, super.documentFrequency, super.termFrequency, super.keyFrequency, p.getDocumentLength()); }
@Override protected void assignScore(final int i, final CandidateResult cc) throws IOException { //update the score as normal cc.updateScore(plm.score(i)); cc.updateOccurrence((i < 16) ? (short)(1 << i) : 0); //get a deep copy of the posting final Posting p = plm.getPosting(i); //writable postings don't copy or retain document length. Make this not so. final WritablePosting wp = p.asWritablePosting(); assert wp.getId() == cc.getDocId() : "Posting does not have same docid as candidate result"; wp.setDocumentLength(p.getDocumentLength()); if (fields[i]) { final int[] fieldLengths = ((FieldPosting)p).getFieldLengths(); final int[] newFieldLengths = new int[fieldCount]; System.arraycopy(fieldLengths, 0, newFieldLengths, 0, fieldCount); //System.err.println(fieldLengths); assert fieldLengths.length == super.collectionStatistics.getNumberOfFields() : " posting "+p +" for docid " + p.getId() + " has wrong number of fields for length"; ((FieldPosting)wp).setFieldLengths(newFieldLengths); } //store somewhere ((FatCandidateResult)cc).setPosting(i, wp); } }
@Override protected void assignScore(final int i, final CandidateResult cc) throws IOException { //update the score as normal cc.updateScore(plm.score(i)); cc.updateOccurrence((i < 16) ? (short)(1 << i) : 0); //get a deep copy of the posting final Posting p = plm.getPosting(i); //writable postings don't copy or retain document length. Make this not so. final WritablePosting wp = p.asWritablePosting(); assert wp.getId() == cc.getDocId() : "Posting does not have same docid as candidate result"; wp.setDocumentLength(p.getDocumentLength()); if (fields[i]) { final int[] fieldLengths = ((FieldPosting)p).getFieldLengths(); final int[] newFieldLengths = new int[fieldCount]; System.arraycopy(fieldLengths, 0, newFieldLengths, 0, fieldCount); //System.err.println(fieldLengths); assert fieldLengths.length == super.collectionStatistics.getNumberOfFields() : " posting "+p +" for docid " + p.getId() + " has wrong number of fields for length"; ((FieldPosting)wp).setFieldLengths(newFieldLengths); } //store somewhere ((FatCandidateResult)cc).setPosting(i, wp); } }
@Test public void testSingleDocumentIndexMatchingFields() throws Exception { ResultSet rs = _testSingleDocumentIndexMatchingFields(); //get postings from ResultSet for first ranked document assertTrue(rs instanceof FatCandidateResultSet); Posting[] postings = ((FatCandidateResultSet)rs).getPostings()[0]; assertEquals(1, postings.length); assertEquals(0, postings[0].getId()); assertEquals(1, postings[0].getFrequency()); assertEquals(9, postings[0].getDocumentLength()); FieldPosting p = (FieldPosting)postings[0]; assertEquals(1, p.getFieldFrequencies()[0]); assertEquals(0, p.getFieldFrequencies()[1]); assertEquals(2, p.getFieldLengths()[0]); assertEquals(7, p.getFieldLengths()[1]); }