public FatFull(Index index) { super(index); fieldCount = super.collectionStatistics.getNumberOfFields(); }
public FatFull(Index index) { super(index); fieldCount = super.collectionStatistics.getNumberOfFields(); }
/** Returns a concrete representation of an index's statistics */ public String toString() { return "Number of documents: " + getNumberOfDocuments() + "\n" + "Number of terms: " + getNumberOfUniqueTerms() + "\n" + "Number of fields: " + getNumberOfFields() + "\n" + "Number of tokens: " + getNumberOfTokens() + "\n"; }
/** Returns a concrete representation of an index's statistics */ public String toString() { return "Number of documents: " + getNumberOfDocuments() + "\n" + "Number of terms: " + getNumberOfUniqueTerms() + "\n" + "Number of fields: " + getNumberOfFields() + "\n" + "Number of tokens: " + getNumberOfTokens() + "\n"; }
final int fieldCount = collStats.getNumberOfFields(); final int queryTermCount = queryTerms.length; final boolean fields[] = new boolean[queryTermCount];
final int fieldCount = collStats.getNumberOfFields(); final int queryTermCount = queryTerms.length; final boolean fields[] = new boolean[queryTermCount];
final int fieldCount = collStats.getNumberOfFields(); final int queryTermCount = queryTerms.length; final boolean fields[] = new boolean[queryTermCount];
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); fieldCount = _cs.getNumberOfFields(); p = new double[fieldCount]; fieldWeights = new double[fieldCount]; this.fieldNormalisations = new Normalisation[fieldCount]; try{ for(int fi=0;fi<fieldCount;fi++) { final Normalisation nf = this.fieldNormalisations[fi] = normClass.newInstance(); final double param = Double.parseDouble(ApplicationSetup.getProperty("c."+ fi, ""+1.0)); nf.setParameter(param); nf.setNumberOfDocuments(_cs.getNumberOfDocuments()); final long tokensf = _cs.getFieldTokens()[fi]; nf.setNumberOfTokens(tokensf); nf.setAverageDocumentLength(_cs.getAverageFieldLengths()[fi]); p[fi] = 1.0d / ((double)fieldCount * (double) _cs.getNumberOfDocuments()); p[fi] = p[fi] / (fieldWeights[fi] = Double.parseDouble( ApplicationSetup.getProperty("p." + fi, "1.0d"))); } } catch (Exception e) { throw new IllegalArgumentException(e); } }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); fieldCount = _cs.getNumberOfFields(); p = new double[fieldCount]; fieldWeights = new double[fieldCount]; this.fieldNormalisations = new Normalisation[fieldCount]; try{ for(int fi=0;fi<fieldCount;fi++) { fieldWeights[fi] = Double.parseDouble(ApplicationSetup.getProperty("w."+ fi, ""+1.0)); final Normalisation nf = normClass.newInstance(); this.fieldNormalisations[fi] = nf; final double param = Double.parseDouble(ApplicationSetup.getProperty("c."+ fi, ""+1.0)); nf.setParameter(param); nf.setNumberOfDocuments(_cs.getNumberOfDocuments()); final long tokensf = _cs.getFieldTokens()[fi]; nf.setNumberOfTokens(tokensf); nf.setAverageDocumentLength(_cs.getAverageFieldLengths()[fi]); p[fi] = 1.0d / ((double)fieldCount * (double)_cs.getNumberOfDocuments()); //System.err.println("p["+fi+"]="+ p[fi]); p[fi] = p[fi] / Double.parseDouble( ApplicationSetup.getProperty("p." + fi, "1.0d")); //System.err.println("p["+fi+"]="+ p[fi]); } } catch (Exception e) { throw new IllegalArgumentException(e); } }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); fieldCount = _cs.getNumberOfFields(); p = new double[fieldCount]; fieldWeights = new double[fieldCount]; this.fieldNormalisations = new Normalisation[fieldCount]; try{ for(int fi=0;fi<fieldCount;fi++) { final Normalisation nf = this.fieldNormalisations[fi] = normClass.newInstance(); final double param = Double.parseDouble(ApplicationSetup.getProperty("c."+ fi, ""+1.0)); nf.setParameter(param); nf.setNumberOfDocuments(_cs.getNumberOfDocuments()); final long tokensf = _cs.getFieldTokens()[fi]; nf.setNumberOfTokens(tokensf); nf.setAverageDocumentLength(_cs.getAverageFieldLengths()[fi]); p[fi] = 1.0d / ((double)fieldCount * (double) _cs.getNumberOfDocuments()); p[fi] = p[fi] / (fieldWeights[fi] = Double.parseDouble( ApplicationSetup.getProperty("p." + fi, "1.0d"))); } } catch (Exception e) { throw new IllegalArgumentException(e); } }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); fieldCount = _cs.getNumberOfFields(); p = new double[fieldCount]; fieldWeights = new double[fieldCount]; this.fieldNormalisations = new Normalisation[fieldCount]; try{ for(int fi=0;fi<fieldCount;fi++) { fieldWeights[fi] = Double.parseDouble(ApplicationSetup.getProperty("w."+ fi, ""+1.0)); final Normalisation nf = normClass.newInstance(); this.fieldNormalisations[fi] = nf; final double param = Double.parseDouble(ApplicationSetup.getProperty("c."+ fi, ""+1.0)); nf.setParameter(param); nf.setNumberOfDocuments(_cs.getNumberOfDocuments()); final long tokensf = _cs.getFieldTokens()[fi]; nf.setNumberOfTokens(tokensf); nf.setAverageDocumentLength(_cs.getAverageFieldLengths()[fi]); p[fi] = 1.0d / ((double)fieldCount * (double)_cs.getNumberOfDocuments()); //System.err.println("p["+fi+"]="+ p[fi]); p[fi] = p[fi] / Double.parseDouble( ApplicationSetup.getProperty("p." + fi, "1.0d")); //System.err.println("p["+fi+"]="+ p[fi]); } } catch (Exception e) { throw new IllegalArgumentException(e); } }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); fieldCount = _cs.getNumberOfFields(); if (fieldCount < 1) throw new IllegalStateException("Fields must be 1 or more"); basicModel.setNumberOfDocuments(_cs.getNumberOfDocuments()); basicModel.setNumberOfTokens(_cs.getNumberOfTokens()); fieldNormalisations = new Normalisation[fieldCount]; fieldGlobalFrequencies = new double[fieldCount]; fieldWeights = new double[fieldCount]; try { for(int fi=0;fi<fieldCount;fi++) { fieldWeights[fi] = Double.parseDouble(ApplicationSetup.getProperty("w."+ fi, ""+1.0)); Normalisation nf = this.fieldNormalisations[fi] = normClass.newInstance(); final double param = Double.parseDouble(ApplicationSetup.getProperty("c."+ fi, ""+1.0)); nf.setParameter(param); nf.setNumberOfDocuments(_cs.getNumberOfDocuments()); final long tokensf = _cs.getFieldTokens()[fi]; nf.setNumberOfTokens(tokensf); nf.setAverageDocumentLength(_cs.getAverageFieldLengths()[fi]); } } catch (Exception e) { throw new IllegalStateException(e); } }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); int fieldCount = _cs.getNumberOfFields(); if (fieldCount < 1) throw new IllegalStateException("Fields must be 1 or more"); long tokens = 0; final long[] tokensf = _cs.getFieldTokens(); for(int fieldId : activeFieldIds) { tokens += tokensf[fieldId]; } super.numberOfTokens = tokens; super.averageDocumentLength = (double)tokens / (double)_cs.getNumberOfDocuments(); basicModel.setCollectionStatistics( new CollectionStatistics(_cs.getNumberOfDocuments(), _cs.getNumberOfUniqueTerms(), tokens, _cs.getNumberOfPointers(), new long[0])); }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); fieldCount = _cs.getNumberOfFields(); if (fieldCount < 1) throw new IllegalStateException("Fields must be 1 or more"); basicModel.setNumberOfDocuments(_cs.getNumberOfDocuments()); basicModel.setNumberOfTokens(_cs.getNumberOfTokens()); fieldNormalisations = new Normalisation[fieldCount]; fieldGlobalFrequencies = new double[fieldCount]; fieldWeights = new double[fieldCount]; try { for(int fi=0;fi<fieldCount;fi++) { fieldWeights[fi] = Double.parseDouble(ApplicationSetup.getProperty("w."+ fi, ""+1.0)); Normalisation nf = this.fieldNormalisations[fi] = normClass.newInstance(); final double param = Double.parseDouble(ApplicationSetup.getProperty("c."+ fi, ""+1.0)); nf.setParameter(param); nf.setNumberOfDocuments(_cs.getNumberOfDocuments()); final long tokensf = _cs.getFieldTokens()[fi]; nf.setNumberOfTokens(tokensf); nf.setAverageDocumentLength(_cs.getAverageFieldLengths()[fi]); } } catch (Exception e) { throw new IllegalStateException(e); } }
@Override public void setCollectionStatistics(CollectionStatistics _cs) { super.setCollectionStatistics(_cs); int fieldCount = _cs.getNumberOfFields(); if (fieldCount < 1) throw new IllegalStateException("Fields must be 1 or more"); long tokens = 0; final long[] tokensf = _cs.getFieldTokens(); for(int fieldId : activeFieldIds) { tokens += tokensf[fieldId]; } super.numberOfTokens = tokens; super.averageDocumentLength = (double)tokens / (double)_cs.getNumberOfDocuments(); basicModel.setCollectionStatistics( new CollectionStatistics(_cs.getNumberOfDocuments(), _cs.getNumberOfUniqueTerms(), tokens, _cs.getNumberOfPointers(), new long[0])); }
@Override protected void assignScore(final int i, final CandidateResult cc) throws IOException { //update the score as normal cc.updateScore(plm.score(i)); cc.updateOccurrence((i < 16) ? (short)(1 << i) : 0); //get a deep copy of the posting final Posting p = plm.getPosting(i); //writable postings don't copy or retain document length. Make this not so. final WritablePosting wp = p.asWritablePosting(); assert wp.getId() == cc.getDocId() : "Posting does not have same docid as candidate result"; wp.setDocumentLength(p.getDocumentLength()); if (fields[i]) { final int[] fieldLengths = ((FieldPosting)p).getFieldLengths(); final int[] newFieldLengths = new int[fieldCount]; System.arraycopy(fieldLengths, 0, newFieldLengths, 0, fieldCount); //System.err.println(fieldLengths); assert fieldLengths.length == super.collectionStatistics.getNumberOfFields() : " posting "+p +" for docid " + p.getId() + " has wrong number of fields for length"; ((FieldPosting)wp).setFieldLengths(newFieldLengths); } //store somewhere ((FatCandidateResult)cc).setPosting(i, wp); } }
@Override protected void assignScore(final int i, final CandidateResult cc) throws IOException { //update the score as normal cc.updateScore(plm.score(i)); cc.updateOccurrence((i < 16) ? (short)(1 << i) : 0); //get a deep copy of the posting final Posting p = plm.getPosting(i); //writable postings don't copy or retain document length. Make this not so. final WritablePosting wp = p.asWritablePosting(); assert wp.getId() == cc.getDocId() : "Posting does not have same docid as candidate result"; wp.setDocumentLength(p.getDocumentLength()); if (fields[i]) { final int[] fieldLengths = ((FieldPosting)p).getFieldLengths(); final int[] newFieldLengths = new int[fieldCount]; System.arraycopy(fieldLengths, 0, newFieldLengths, 0, fieldCount); //System.err.println(fieldLengths); assert fieldLengths.length == super.collectionStatistics.getNumberOfFields() : " posting "+p +" for docid " + p.getId() + " has wrong number of fields for length"; ((FieldPosting)wp).setFieldLengths(newFieldLengths); } //store somewhere ((FatCandidateResult)cc).setPosting(i, wp); } }
@Test public void testTopics() throws Exception { ApplicationSetup.setProperty("FieldTags.process", "TITLE"); ApplicationSetup.setProperty("termpipelines", ""); Index index = IndexTestUtils.makeIndexFieldsBlocks(new String[]{"doc1"}, new String[]{"the fox jumped <TITLE>over</TITLE>"}); assertEquals(1, index.getCollectionStatistics().getNumberOfFields()); ApplicationSetup.setProperty("trec.topics.parser", SingleLineTRECQuery.class.getName()); ApplicationSetup.setProperty("SingleLineTRECQuery.tokenise", "false"); ApplicationSetup.setProperty("trec.topics.matchopql", "true"); String f = super.writeTemporaryFile("x.topics", new String[]{ "1 fox", "2 #1(fox jumped)", "3 over.TITLE" }); ApplicationSetup.setProperty("trec.topics", f); TRECQuerying tq = new TRECQuerying(index.getIndexRef()); tq.intialise(); tq.processQueries(); }
assertEquals(123, index.getIntIndexProperty("num.field.0.Tokens", -1)); assertEquals(611, index.getIntIndexProperty("num.field.1.Tokens", -1)); assertEquals(2, index.getCollectionStatistics().getNumberOfFields()); assertEquals(123, index.getCollectionStatistics().getFieldTokens()[0]); assertEquals(611, index.getCollectionStatistics().getFieldTokens()[1]);
assertNotNull(index); assertEquals(1, index.getCollectionStatistics().getNumberOfDocuments()); assertEquals(2, index.getCollectionStatistics().getNumberOfFields()); assertEquals(2, index.getCollectionStatistics().getFieldTokens()[0]); assertEquals(7, index.getCollectionStatistics().getFieldTokens()[1]); assertEquals(index.getCollectionStatistics().getNumberOfFields(), fatIndex.getCollectionStatistics().getNumberOfFields()); assertEquals(index.getCollectionStatistics().getFieldTokens()[0], fatIndex.getCollectionStatistics().getFieldTokens()[0]); assertEquals(index.getCollectionStatistics().getFieldTokens()[1], fatIndex.getCollectionStatistics().getFieldTokens()[1]);