org.apache.lucene.search.CollectionStatistics.<init> java code examples

 /**
  * Returns {@link CollectionStatistics} for a field.
  * 
  * This can be overridden for example, to return a field's statistics
  * across a distributed collection.
  * @lucene.experimental
  */
 public CollectionStatistics collectionStatistics(String field) throws IOException {
  final int docCount;
  final long sumTotalTermFreq;
  final long sumDocFreq;

  assert field != null;
  
  Terms terms = MultiFields.getTerms(reader, field);
  if (terms == null) {
   docCount = 0;
   sumTotalTermFreq = 0;
   sumDocFreq = 0;
  } else {
   docCount = terms.getDocCount();
   sumTotalTermFreq = terms.getSumTotalTermFreq();
   sumDocFreq = terms.getSumDocFreq();
  }

  return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq);
 }
}

public TermWeight(IndexSearcher searcher, boolean needsScores,
  float boost, TermContext termStates) throws IOException {
 super(TermQuery.this);
 if (needsScores && termStates == null) {
  throw new IllegalStateException("termStates are required when scores are needed");
 }
 this.needsScores = needsScores;
 this.termStates = termStates;
 this.similarity = searcher.getSimilarity(needsScores);
 final CollectionStatistics collectionStats;
 final TermStatistics termStats;
 if (needsScores) {
  collectionStats = searcher.collectionStatistics(term.field());
  termStats = searcher.termStatistics(term, termStates);
 } else {
  // we do not need the actual stats, use fake stats with docFreq=maxDoc and ttf=-1
  final int maxDoc = searcher.getIndexReader().maxDoc();
  collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1);
  termStats = new TermStatistics(term.bytes(), maxDoc, -1);
 }
  this.stats = similarity.computeWeight(boost, collectionStats, termStats);
}

public static ObjectObjectHashMap<String, CollectionStatistics> readFieldStats(StreamInput in, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
  final int numFieldStatistics = in.readVInt();
  if (fieldStatistics == null) {
    fieldStatistics = HppcMaps.newNoNullKeysMap(numFieldStatistics);
  }
  for (int i = 0; i < numFieldStatistics; i++) {
    final String field = in.readString();
    assert field != null;
    final long maxDoc = in.readVLong();
    final long docCount = subOne(in.readVLong());
    final long sumTotalTermFreq = subOne(in.readVLong());
    final long sumDocFreq = subOne(in.readVLong());
    CollectionStatistics stats = new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
    fieldStatistics.put(field, stats);
  }
  return fieldStatistics;
}

private static void validateScoresArePositive(Version indexCreatedVersion, Similarity similarity) throws IOException {
  CollectionStatistics collectionStats = new CollectionStatistics("some_field", 1200, 1100, 3000, 2000);
  TermStatistics termStats = new TermStatistics(new BytesRef("some_value"), 100, 130);
  SimWeight simWeight = similarity.computeWeight(2f, collectionStats, termStats);
  FieldInvertState state = new FieldInvertState(indexCreatedVersion.luceneVersion.major,
      "some_field", 20, 20, 0, 50); // length = 20, no overlap
  final long norm = similarity.computeNorm(state);
  LeafReader reader = new SingleNormLeafReader(norm);
  SimScorer scorer = similarity.simScorer(simWeight, reader.getContext());
  for (int freq = 1; freq <= 10; ++freq) {
    float score = scorer.score(0, freq);
    if (score < 0) {
      DEPRECATION_LOGGER.deprecated("Similarities should not return negative scores:\n" +
          scorer.explain(0, Explanation.match(freq, "term freq")));
      break;
    }
  }
}

private static void validateScoresDoNotDecreaseWithFreq(Version indexCreatedVersion, Similarity similarity) throws IOException {
  CollectionStatistics collectionStats = new CollectionStatistics("some_field", 1200, 1100, 3000, 2000);
  TermStatistics termStats = new TermStatistics(new BytesRef("some_value"), 100, 130);
  SimWeight simWeight = similarity.computeWeight(2f, collectionStats, termStats);
  FieldInvertState state = new FieldInvertState(indexCreatedVersion.luceneVersion.major,
      "some_field", 20, 20, 0, 50); // length = 20, no overlap
  final long norm = similarity.computeNorm(state);
  LeafReader reader = new SingleNormLeafReader(norm);
  SimScorer scorer = similarity.simScorer(simWeight, reader.getContext());
  float previousScore = Float.NEGATIVE_INFINITY;
  for (int freq = 1; freq <= 10; ++freq) {
    float score = scorer.score(0, freq);
    if (score < previousScore) {
      DEPRECATION_LOGGER.deprecated("Similarity scores should not decrease when term frequency increases:\n" +
          scorer.explain(0, Explanation.match(freq - 1, "term freq")) + "\n" +
          scorer.explain(0, Explanation.match(freq, "term freq")));
      break;
    }
    previousScore = score;
  }
}

private static void validateScoresDoNotIncreaseWithNorm(Version indexCreatedVersion, Similarity similarity) throws IOException {
  CollectionStatistics collectionStats = new CollectionStatistics("some_field", 1200, 1100, 3000, 2000);
  TermStatistics termStats = new TermStatistics(new BytesRef("some_value"), 100, 130);
  SimWeight simWeight = similarity.computeWeight(2f, collectionStats, termStats);
  SimScorer previousScorer = null;
  long previousNorm = 0;
  float previousScore = Float.POSITIVE_INFINITY;
  for (int length = 1; length <= 10; ++length) {
    FieldInvertState state = new FieldInvertState(indexCreatedVersion.luceneVersion.major,
        "some_field", length, length, 0, 50); // length = 20, no overlap
    final long norm = similarity.computeNorm(state);
    if (Long.compareUnsigned(previousNorm, norm) > 0) {
      // esoteric similarity, skip this check
      break;
    }
    LeafReader reader = new SingleNormLeafReader(norm);
    SimScorer scorer = similarity.simScorer(simWeight, reader.getContext());
    float score = scorer.score(0, 1);
    if (score > previousScore) {
      DEPRECATION_LOGGER.deprecated("Similarity scores should not increase when norm increases:\n" +
          previousScorer.explain(0, Explanation.match(1, "term freq")) + "\n" +
          scorer.explain(0, Explanation.match(1, "term freq")));
      break;
    }
    previousScorer = scorer;
    previousScore = score;
    previousNorm = norm;
  }
}

CollectionStatistics existing = fieldStatistics.get(key);
if (existing != null) {
  CollectionStatistics merged = new CollectionStatistics(
      key, existing.maxDoc() + value.maxDoc(),
      optionalSum(existing.docCount(), value.docCount()),

@Override
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
 IndexSearcher searcher = (IndexSearcher)context.get("searcher");
 final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(true), field);
 if (similarity == null) {
  throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
 }
 // Only works if the contribution of the tf is 1 when the freq is 1 and contribution of the idf
 // is 1 when docCount == docFreq == 1
 final SimWeight simWeight = similarity.computeWeight(1f,
   new CollectionStatistics(field, 1, 1, 1, 1),
   new TermStatistics(new BytesRef("bogus"), 1, 1));
 final SimScorer simScorer = similarity.simScorer(simWeight, readerContext);
 
 return new FloatDocValues(this) {
  int lastDocID = -1;
  @Override
  public float floatVal(int docID) throws IOException {
   if (docID < lastDocID) {
    throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
   }
   lastDocID = docID;
   return simScorer.score(docID, 1f);
  }
 };
}

 /**
  * Returns {@link CollectionStatistics} for a field.
  * 
  * This can be overridden for example, to return a field's statistics
  * across a distributed collection.
  * @lucene.experimental
  */
 public CollectionStatistics collectionStatistics(String field) throws IOException {
  final int docCount;
  final long sumTotalTermFreq;
  final long sumDocFreq;

  assert field != null;
  
  Terms terms = MultiFields.getTerms(reader, field);
  if (terms == null) {
   docCount = 0;
   sumTotalTermFreq = 0;
   sumDocFreq = 0;
  } else {
   docCount = terms.getDocCount();
   sumTotalTermFreq = terms.getSumTotalTermFreq();
   sumDocFreq = terms.getSumDocFreq();
  }

  return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq);
 }
}

 /**
  * Returns {@link CollectionStatistics} for a field.
  * 
  * This can be overridden for example, to return a field's statistics
  * across a distributed collection.
  * @lucene.experimental
  */
 public CollectionStatistics collectionStatistics(String field) throws IOException {
  final int docCount;
  final long sumTotalTermFreq;
  final long sumDocFreq;

  assert field != null;
  
  Terms terms = MultiFields.getTerms(reader, field);
  if (terms == null) {
   docCount = 0;
   sumTotalTermFreq = 0;
   sumDocFreq = 0;
  } else {
   docCount = terms.getDocCount();
   sumTotalTermFreq = terms.getSumTotalTermFreq();
   sumDocFreq = terms.getSumDocFreq();
  }
  return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq);
 }
}

public TermWeight(IndexSearcher searcher, boolean needsScores,
  float boost, TermContext termStates) throws IOException {
 super(TermQuery.this);
 if (needsScores && termStates == null) {
  throw new IllegalStateException("termStates are required when scores are needed");
 }
 this.needsScores = needsScores;
 this.termStates = termStates;
 this.similarity = searcher.getSimilarity(needsScores);
 final CollectionStatistics collectionStats;
 final TermStatistics termStats;
 if (needsScores) {
  collectionStats = searcher.collectionStatistics(term.field());
  termStats = searcher.termStatistics(term, termStates);
 } else {
  // we do not need the actual stats, use fake stats with docFreq=maxDoc and ttf=-1
  final int maxDoc = searcher.getIndexReader().maxDoc();
  collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1);
  termStats = new TermStatistics(term.bytes(), maxDoc, -1);
 }
  this.stats = similarity.computeWeight(boost, collectionStats, termStats);
}

public TermWeight(final IndexSearcher searcher, final boolean needsScores, final float boost,
         final TermContext termStates) throws IOException {
  super(DependentTermQuery.this);
  if (needsScores && termStates == null) {
    throw new IllegalStateException("termStates are required when scores are needed");
  }
  final Term term = getTerm();
  this.needsScores = needsScores;
  this.termStates = termStates;
  this.similarity = searcher.getSimilarity(needsScores);
  final int maxDoc = searcher.getIndexReader().maxDoc();
  final CollectionStatistics collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1);
  final TermStatistics termStats;
  if (needsScores) {
    termStats = searcher.termStatistics(term, termStates);
  } else {
    // we do not need the actual stats, use fake stats with docFreq=maxDoc and ttf=-1
    termStats = new TermStatistics(term.bytes(), maxDoc, -1);
  }
  fieldBoostFactor = fieldBoost.getBoost(getTerm().field(), searcher.getIndexReader());
  this.stats = similarity.computeWeight(boost * fieldBoostFactor, collectionStats, termStats);
}

public static ObjectObjectHashMap<String, CollectionStatistics> readFieldStats(StreamInput in, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
  final int numFieldStatistics = in.readVInt();
  if (fieldStatistics == null) {
    fieldStatistics = HppcMaps.newNoNullKeysMap(numFieldStatistics);
  }
  for (int i = 0; i < numFieldStatistics; i++) {
    final String field = in.readString();
    assert field != null;
    final long maxDoc = in.readVLong();
    final long docCount = subOne(in.readVLong());
    final long sumTotalTermFreq = subOne(in.readVLong());
    final long sumDocFreq = subOne(in.readVLong());
    CollectionStatistics stats = new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
    fieldStatistics.put(field, stats);
  }
  return fieldStatistics;
}

public static ObjectObjectHashMap<String, CollectionStatistics> readFieldStats(StreamInput in, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
  final int numFieldStatistics = in.readVInt();
  if (fieldStatistics == null) {
    fieldStatistics = HppcMaps.newNoNullKeysMap(numFieldStatistics);
  }
  for (int i = 0; i < numFieldStatistics; i++) {
    final String field = in.readString();
    assert field != null;
    final long maxDoc = in.readVLong();
    final long docCount = subOne(in.readVLong());
    final long sumTotalTermFreq = subOne(in.readVLong());
    final long sumDocFreq = subOne(in.readVLong());
    CollectionStatistics stats = new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
    fieldStatistics.put(field, stats);
  }
  return fieldStatistics;
}

public static ObjectObjectHashMap<String, CollectionStatistics> readFieldStats(StreamInput in, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
  final int numFieldStatistics = in.readVInt();
  if (fieldStatistics == null) {
    fieldStatistics = HppcMaps.newNoNullKeysMap(numFieldStatistics);
  }
  for (int i = 0; i < numFieldStatistics; i++) {
    final String field = in.readString();
    assert field != null;
    final long maxDoc = in.readVLong();
    final long docCount = subOne(in.readVLong());
    final long sumTotalTermFreq = subOne(in.readVLong());
    final long sumDocFreq = subOne(in.readVLong());
    CollectionStatistics stats = new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
    fieldStatistics.put(field, stats);
  }
  return fieldStatistics;
}

public static ObjectObjectHashMap<String, CollectionStatistics> readFieldStats(StreamInput in, ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics) throws IOException {
  final int numFieldStatistics = in.readVInt();
  if (fieldStatistics == null) {
    fieldStatistics = HppcMaps.newNoNullKeysMap(numFieldStatistics);
  }
  for (int i = 0; i < numFieldStatistics; i++) {
    final String field = in.readString();
    assert field != null;
    final long maxDoc = in.readVLong();
    final long docCount = subOne(in.readVLong());
    final long sumTotalTermFreq = subOne(in.readVLong());
    final long sumDocFreq = subOne(in.readVLong());
    CollectionStatistics stats = new CollectionStatistics(field, maxDoc, docCount, sumTotalTermFreq, sumDocFreq);
    fieldStatistics.put(field, stats);
  }
  return fieldStatistics;
}

private static void validateScoresArePositive(Version indexCreatedVersion, Similarity similarity) throws IOException {
  CollectionStatistics collectionStats = new CollectionStatistics("some_field", 1200, 1100, 3000, 2000);
  TermStatistics termStats = new TermStatistics(new BytesRef("some_value"), 100, 130);
  SimWeight simWeight = similarity.computeWeight(2f, collectionStats, termStats);
  FieldInvertState state = new FieldInvertState(indexCreatedVersion.luceneVersion.major,
      "some_field", 20, 20, 0, 50); // length = 20, no overlap
  final long norm = similarity.computeNorm(state);
  LeafReader reader = new SingleNormLeafReader(norm);
  SimScorer scorer = similarity.simScorer(simWeight, reader.getContext());
  for (int freq = 1; freq <= 10; ++freq) {
    float score = scorer.score(0, freq);
    if (score < 0) {
      DEPRECATION_LOGGER.deprecated("Similarities should not return negative scores:\n" +
          scorer.explain(0, Explanation.match(freq, "term freq")));
      break;
    }
  }
}

public TermWeight(IndexSearcher searcher, boolean needsScores, TermContext termStates)
  throws IOException {
 super(TermQuery.this);
 this.needsScores = needsScores;
 assert termStates != null : "TermContext must not be null";
 // checked with a real exception in TermQuery constructor
 assert termStates.hasOnlyRealTerms();
 this.termStates = termStates;
 this.similarity = searcher.getSimilarity(needsScores);
 final CollectionStatistics collectionStats;
 final TermStatistics termStats;
 if (needsScores) {
  collectionStats = searcher.collectionStatistics(term.field());
  termStats = searcher.termStatistics(term, termStates);
 } else {
  // do not bother computing actual stats, scores are not needed
  final int maxDoc = searcher.getIndexReader().maxDoc();
  final int docFreq = termStates.docFreq();
  final long totalTermFreq = termStates.totalTermFreq();
  collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1);
  termStats = new TermStatistics(term.bytes(), docFreq, totalTermFreq);
 }
  this.stats = similarity.computeWeight(collectionStats, termStats);
}

public TermWeight(IndexSearcher searcher, boolean needsScores, TermContext termStates)
  throws IOException {
 super(TermQuery.this);
 this.needsScores = needsScores;
 assert termStates != null : "TermContext must not be null";
 // checked with a real exception in TermQuery constructor
 assert termStates.hasOnlyRealTerms();
 this.termStates = termStates;
 this.similarity = searcher.getSimilarity(needsScores);
 final CollectionStatistics collectionStats;
 final TermStatistics termStats;
 if (needsScores) {
  collectionStats = searcher.collectionStatistics(term.field());
  termStats = searcher.termStatistics(term, termStates);
 } else {
  // do not bother computing actual stats, scores are not needed
  final int maxDoc = searcher.getIndexReader().maxDoc();
  final int docFreq = termStates.docFreq();
  final long totalTermFreq = termStates.totalTermFreq();
  collectionStats = new CollectionStatistics(term.field(), maxDoc, -1, -1, -1);
  termStats = new TermStatistics(term.bytes(), docFreq, totalTermFreq);
 }
  this.stats = similarity.computeWeight(collectionStats, termStats);
}

private static void validateScoresDoNotDecreaseWithFreq(Version indexCreatedVersion, Similarity similarity) throws IOException {
  CollectionStatistics collectionStats = new CollectionStatistics("some_field", 1200, 1100, 3000, 2000);
  TermStatistics termStats = new TermStatistics(new BytesRef("some_value"), 100, 130);
  SimWeight simWeight = similarity.computeWeight(2f, collectionStats, termStats);
  FieldInvertState state = new FieldInvertState(indexCreatedVersion.luceneVersion.major,
      "some_field", 20, 20, 0, 50); // length = 20, no overlap
  final long norm = similarity.computeNorm(state);
  LeafReader reader = new SingleNormLeafReader(norm);
  SimScorer scorer = similarity.simScorer(simWeight, reader.getContext());
  float previousScore = Float.NEGATIVE_INFINITY;
  for (int freq = 1; freq <= 10; ++freq) {
    float score = scorer.score(0, freq);
    if (score < previousScore) {
      DEPRECATION_LOGGER.deprecated("Similarity scores should not decrease when term frequency increases:\n" +
          scorer.explain(0, Explanation.match(freq - 1, "term freq")) + "\n" +
          scorer.explain(0, Explanation.match(freq, "term freq")));
      break;
    }
    previousScore = score;
  }
}

Popular methods of CollectionStatistics

maxDoc
returns the total number of documents, regardless of whether they all contain values for this field.
sumTotalTermFreq
returns the total number of tokens for this field
docCount
returns the total number of documents that have at least one term for this field.
field
returns the field name
sumDocFreq
returns the total number of postings for this field

Popular in Java

Making http post requests using okhttp
getResourceAsStream (ClassLoader)
onRequestPermissionsResult (Fragment)
getApplicationContext (Context)
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
Semaphore (java.util.concurrent)
A counting semaphore. Conceptually, a semaphore maintains a set of permits. Each #acquire blocks if
ZipFile (java.util.zip)
This class provides random read access to a zip file. You pay more to read the zip file's central di
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
JPanel (javax.swing)
Option (scala)
Best IntelliJ plugins

How to use org.apache.lucene.search.CollectionStatisticsconstructor

Best Java code snippets using org.apache.lucene.search.CollectionStatistics.<init> (Showing top 20 results out of 315)

How to use
org.apache.lucene.search.CollectionStatistics
constructor