/** The default implementation computes the average as <code>sumTotalTermFreq / docCount</code> */ protected float avgFieldLength(CollectionStatistics collectionStats) { final long sumTotalTermFreq; if (collectionStats.sumTotalTermFreq() == -1) { // frequencies are omitted (tf=1), its # of postings if (collectionStats.sumDocFreq() == -1) { // theoretical case only: remove! return 1f; } sumTotalTermFreq = collectionStats.sumDocFreq(); } else { sumTotalTermFreq = collectionStats.sumTotalTermFreq(); } final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); return (float) (sumTotalTermFreq / (double) docCount); }
if (collectionStats.sumDocFreq() == -1) { numberOfFieldTokens = collectionStats.sumDocFreq(); avgFieldLength = (float) (collectionStats.sumDocFreq() / (double)numberOfDocuments);
private void writeFieldStatistics(CollectionStatistics fieldStats) throws IOException { long sttf = fieldStats.sumTotalTermFreq(); assert (sttf >= -1); writePotentiallyNegativeVLong(sttf); long sdf = fieldStats.sumDocFreq(); assert (sdf >= -1); writePotentiallyNegativeVLong(sdf); int dc = (int) fieldStats.docCount(); assert (dc >= -1); writePotentiallyNegativeVInt(dc); }
public long sumdf() throws IOException { return fieldStats.sumDocFreq(); }
@Override public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { Query query = new Query(boost); long docCount = collectionStats.docCount(); if (docCount == -1) { docCount = collectionStats.maxDoc(); } Field field = new Field(docCount, collectionStats.sumDocFreq(), collectionStats.sumTotalTermFreq()); Term[] terms = new Term[termStats.length]; for (int i = 0; i < termStats.length; ++i) { terms[i] = new Term(termStats[i].docFreq(), termStats[i].totalTermFreq()); } return new Weight(collectionStats.field(), query, field, terms); }
public long sumdf() throws IOException { return fieldStats.sumDocFreq(); }
public static void writeFieldStats(StreamOutput out, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException { out.writeVInt(fieldStatistics.size()); for (ObjectObjectCursor<String, CollectionStatistics> c : fieldStatistics) { out.writeString(c.key); CollectionStatistics statistics = c.value; assert statistics.maxDoc() >= 0; out.writeVLong(statistics.maxDoc()); out.writeVLong(addOne(statistics.docCount())); out.writeVLong(addOne(statistics.sumTotalTermFreq())); out.writeVLong(addOne(statistics.sumDocFreq())); } }
optionalSum(existing.docCount(), value.docCount()), optionalSum(existing.sumTotalTermFreq(), value.sumTotalTermFreq()), optionalSum(existing.sumDocFreq(), value.sumDocFreq()) ); fieldStatistics.put(key, merged);
/** The default implementation computes the average as <code>sumTotalTermFreq / docCount</code> */ protected float avgFieldLength(CollectionStatistics collectionStats) { final long sumTotalTermFreq; if (collectionStats.sumTotalTermFreq() == -1) { // frequencies are omitted (tf=1), its # of postings if (collectionStats.sumDocFreq() == -1) { // theoretical case only: remove! return 1f; } sumTotalTermFreq = collectionStats.sumDocFreq(); } else { sumTotalTermFreq = collectionStats.sumTotalTermFreq(); } final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount(); return (float) (sumTotalTermFreq / (double) docCount); }
public void reportCollectionStatistics()throws IOException { IndexSearcher searcher = new IndexSearcher(reader); CollectionStatistics collectionStats = searcher.collectionStatistics(Lucene4IRConstants.FIELD_ALL); long token_count = collectionStats.sumTotalTermFreq(); long doc_count = collectionStats.docCount(); long sum_doc_count = collectionStats.sumDocFreq(); long avg_doc_length = token_count / doc_count; System.out.println("ALL: Token count: " + token_count+ " Doc Count: " + doc_count + " sum doc: " + sum_doc_count + " avg doc len: " + avg_doc_length); collectionStats = searcher.collectionStatistics(Lucene4IRConstants.FIELD_TITLE); token_count = collectionStats.sumTotalTermFreq(); doc_count = collectionStats.docCount(); sum_doc_count = collectionStats.sumDocFreq(); avg_doc_length = token_count / doc_count; System.out.println("TITLE: Token count: " + token_count+ " Doc Count: " + doc_count + " sum doc: " + sum_doc_count + " avg doc len: " + avg_doc_length); collectionStats = searcher.collectionStatistics(Lucene4IRConstants.FIELD_CONTENT); token_count = collectionStats.sumTotalTermFreq(); doc_count = collectionStats.docCount(); sum_doc_count = collectionStats.sumDocFreq(); avg_doc_length = token_count / doc_count; System.out.println("CONTENT: Token count: " + token_count+ " Doc Count: " + doc_count + " sum doc: " + sum_doc_count + " avg doc len: " + avg_doc_length); }
private void writeFieldStatistics(CollectionStatistics fieldStats) throws IOException { long sttf = fieldStats.sumTotalTermFreq(); assert (sttf >= -1); writePotentiallyNegativeVLong(sttf); long sdf = fieldStats.sumDocFreq(); assert (sdf >= -1); writePotentiallyNegativeVLong(sdf); int dc = (int) fieldStats.docCount(); assert (dc >= -1); writePotentiallyNegativeVInt(dc); }
private void writeFieldStatistics(CollectionStatistics fieldStats) throws IOException { long sttf = fieldStats.sumTotalTermFreq(); assert (sttf >= -1); writePotentiallyNegativeVLong(sttf); long sdf = fieldStats.sumDocFreq(); assert (sdf >= -1); writePotentiallyNegativeVLong(sdf); int dc = (int) fieldStats.docCount(); assert (dc >= -1); writePotentiallyNegativeVInt(dc); }
private void writeFieldStatistics(CollectionStatistics fieldStats) throws IOException { long sttf = fieldStats.sumTotalTermFreq(); assert (sttf >= -1); writePotentiallyNegativeVLong(sttf); long sdf = fieldStats.sumDocFreq(); assert (sdf >= -1); writePotentiallyNegativeVLong(sdf); int dc = (int) fieldStats.docCount(); assert (dc >= -1); writePotentiallyNegativeVInt(dc); }
private void writeFieldStatistics(CollectionStatistics fieldStats) throws IOException { long sttf = fieldStats.sumTotalTermFreq(); assert (sttf >= -1); writePotentiallyNegativeVLong(sttf); long sdf = fieldStats.sumDocFreq(); assert (sdf >= -1); writePotentiallyNegativeVLong(sdf); int dc = (int) fieldStats.docCount(); assert (dc >= -1); writePotentiallyNegativeVInt(dc); }
@Override public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { Query query = new Query(boost); long docCount = collectionStats.docCount(); if (docCount == -1) { docCount = collectionStats.maxDoc(); } Field field = new Field(docCount, collectionStats.sumDocFreq(), collectionStats.sumTotalTermFreq()); Term[] terms = new Term[termStats.length]; for (int i = 0; i < termStats.length; ++i) { terms[i] = new Term(termStats[i].docFreq(), termStats[i].totalTermFreq()); } return new Weight(collectionStats.field(), query, field, terms); }
@Override public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) { Query query = new Query(boost); long docCount = collectionStats.docCount(); if (docCount == -1) { docCount = collectionStats.maxDoc(); } Field field = new Field(docCount, collectionStats.sumDocFreq(), collectionStats.sumTotalTermFreq()); Term[] terms = new Term[termStats.length]; for (int i = 0; i < termStats.length; ++i) { terms[i] = new Term(termStats[i].docFreq(), termStats[i].totalTermFreq()); } return new Weight(collectionStats.field(), query, field, terms); }
public static void writeFieldStats(StreamOutput out, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException { out.writeVInt(fieldStatistics.size()); for (ObjectObjectCursor<String, CollectionStatistics> c : fieldStatistics) { out.writeString(c.key); CollectionStatistics statistics = c.value; assert statistics.maxDoc() >= 0; out.writeVLong(statistics.maxDoc()); out.writeVLong(addOne(statistics.docCount())); out.writeVLong(addOne(statistics.sumTotalTermFreq())); out.writeVLong(addOne(statistics.sumDocFreq())); } }
public static void writeFieldStats(StreamOutput out, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException { out.writeVInt(fieldStatistics.size()); for (ObjectObjectCursor<String, CollectionStatistics> c : fieldStatistics) { out.writeString(c.key); CollectionStatistics statistics = c.value; assert statistics.maxDoc() >= 0; out.writeVLong(statistics.maxDoc()); out.writeVLong(addOne(statistics.docCount())); out.writeVLong(addOne(statistics.sumTotalTermFreq())); out.writeVLong(addOne(statistics.sumDocFreq())); } }
public static void writeFieldStats(StreamOutput out, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException { out.writeVInt(fieldStatistics.size()); for (ObjectObjectCursor<String, CollectionStatistics> c : fieldStatistics) { out.writeString(c.key); CollectionStatistics statistics = c.value; assert statistics.maxDoc() >= 0; out.writeVLong(statistics.maxDoc()); out.writeVLong(addOne(statistics.docCount())); out.writeVLong(addOne(statistics.sumTotalTermFreq())); out.writeVLong(addOne(statistics.sumDocFreq())); } }
public static void writeFieldStats(StreamOutput out, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException { out.writeVInt(fieldStatistics.size()); for (ObjectObjectCursor<String, CollectionStatistics> c : fieldStatistics) { out.writeString(c.key); CollectionStatistics statistics = c.value; assert statistics.maxDoc() >= 0; out.writeVLong(statistics.maxDoc()); out.writeVLong(addOne(statistics.docCount())); out.writeVLong(addOne(statistics.sumTotalTermFreq())); out.writeVLong(addOne(statistics.sumDocFreq())); } }