org.terrier.structures.CollectionStatistics.getAverageDocumentLength java code examples

/**
 * Constructs an instance of ExpansionTerms.
* @param collStats Statistics of the used corpora
* @param _lexicon Lexicon The lexicon used for retrieval.
* @param _directIndex DirectIndex to use for finding terms for documents
* @param _documentIndex DocumentIndex to use for finding statistics about documents
 */
public DFRBagExpansionTerms(CollectionStatistics collStats, Lexicon<String> _lexicon, PostingIndex<?> _directIndex, DocumentIndex _documentIndex) {
  this.numberOfDocuments = collStats.getNumberOfDocuments();
  this.numberOfTokens = collStats.getNumberOfTokens();
  this.averageDocumentLength = collStats.getAverageDocumentLength();
  this.terms = new TIntObjectHashMap<ExpansionTerm>();
  this.totalDocumentLength = 0;
  this.lexicon = _lexicon;
  this.documentIndex = _documentIndex;
  this.directIndex = _directIndex;
}

/**
 * Constructs an instance of ExpansionTerms.
* @param collStats Statistics of the used corpora
* @param _lexicon Lexicon The lexicon used for retrieval.
* @param _directIndex DirectIndex to use for finding terms for documents
* @param _documentIndex DocumentIndex to use for finding statistics about documents
 */
public DFRBagExpansionTerms(CollectionStatistics collStats, Lexicon<String> _lexicon, PostingIndex<?> _directIndex, DocumentIndex _documentIndex) {
  this.numberOfDocuments = collStats.getNumberOfDocuments();
  this.numberOfTokens = collStats.getNumberOfTokens();
  this.averageDocumentLength = collStats.getAverageDocumentLength();
  this.terms = new TIntObjectHashMap<ExpansionTerm>();
  this.totalDocumentLength = 0;
  this.lexicon = _lexicon;
  this.documentIndex = _documentIndex;
  this.directIndex = _directIndex;
}

@Override
public void setCollectionStatistics(CollectionStatistics _cs) {
  super.setCollectionStatistics(_cs);
  this.basicModel.setNumberOfDocuments(_cs.getNumberOfDocuments());
  this.basicModel.setNumberOfTokens(_cs.getNumberOfTokens());
  this.afterEffect.setAverageDocumentLength(_cs.getAverageDocumentLength());
  this.normalisation.setNumberOfDocuments(_cs.getNumberOfDocuments());
  this.normalisation.setNumberOfTokens(_cs.getNumberOfTokens());
  this.normalisation.setAverageDocumentLength(_cs.getAverageDocumentLength());
  this.i.setNumberOfDocuments(_cs.getNumberOfDocuments());
}

@Override
public void setCollectionStatistics(CollectionStatistics _cs) {
  super.setCollectionStatistics(_cs);
  this.basicModel.setNumberOfDocuments(_cs.getNumberOfDocuments());
  this.basicModel.setNumberOfTokens(_cs.getNumberOfTokens());
  this.afterEffect.setAverageDocumentLength(_cs.getAverageDocumentLength());
  this.normalisation.setNumberOfDocuments(_cs.getNumberOfDocuments());
  this.normalisation.setNumberOfTokens(_cs.getNumberOfTokens());
  this.normalisation.setAverageDocumentLength(_cs.getAverageDocumentLength());
  this.i.setNumberOfDocuments(_cs.getNumberOfDocuments());
}

/**
 * prepare
 */
public void prepare() {
  averageDocumentLength = cs.getAverageDocumentLength();
  numberOfDocuments = (double)cs.getNumberOfDocuments();
  i.setNumberOfDocuments(numberOfDocuments);
  numberOfTokens = (double)cs.getNumberOfTokens();
  numberOfUniqueTerms = (double)cs.getNumberOfUniqueTerms();
  numberOfPointers = (double)cs.getNumberOfPointers();
  documentFrequency = (double)getOverflowed(es.getDocumentFrequency());
  termFrequency = (double)getOverflowed(es.getFrequency());		
}

/**
 * prepare
 */
public void prepare() {
  averageDocumentLength = cs.getAverageDocumentLength();
  numberOfDocuments = (double)cs.getNumberOfDocuments();
  i.setNumberOfDocuments(numberOfDocuments);
  numberOfTokens = (double)cs.getNumberOfTokens();
  numberOfUniqueTerms = (double)cs.getNumberOfUniqueTerms();
  numberOfPointers = (double)cs.getNumberOfPointers();
  documentFrequency = (double)getOverflowed(es.getDocumentFrequency());
  termFrequency = (double)getOverflowed(es.getFrequency());		
}

@Test public void testWritable() throws Exception
{
  CollectionStatistics cs1 = new CollectionStatistics(5, 6, 7, 8, new long[]{2});
  
  
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  DataOutputStream dos = new DataOutputStream(baos);
  cs1.write(dos);
  dos.flush();
  final byte[] bytes = baos.toByteArray();
  assertTrue(bytes.length > 0);
  CollectionStatistics cs2 = new CollectionStatistics();
  cs2.readFields(new DataInputStream(new ByteArrayInputStream(bytes)));
  
  assertEquals(cs1.getNumberOfDocuments(), cs2.getNumberOfDocuments());
  assertEquals(cs1.getNumberOfUniqueTerms(), cs2.getNumberOfUniqueTerms());
  
  assertEquals(cs1.getNumberOfPointers(), cs2.getNumberOfPointers());
  assertEquals(cs1.getNumberOfTokens(), cs2.getNumberOfTokens());
  assertEquals(cs1.getAverageDocumentLength(), cs2.getAverageDocumentLength(), 0.0d);
  //TODO: test fields
  
}

protected void checkCollectionStatistics(Index index)
{
  final CollectionStatistics cs = index.getCollectionStatistics();
  System.err.println("num docs=" + cs.getNumberOfDocuments());
  assertEquals("Number of documents doesn't match", DOCUMENT_LENGTHS.length, cs.getNumberOfDocuments());
  assertEquals("Number of tokens doesn't match", StaTools.sum(DOCUMENT_LENGTHS), cs.getNumberOfTokens());
  assertEquals("Average document length doesn't match", StaTools.mean(DOCUMENT_LENGTHS), cs.getAverageDocumentLength(), 0.0d);
  assertEquals("Number of pointers doesnt match", NUMBER_POINTERS, cs.getNumberOfPointers());
  assertEquals("Number of unique terms doesn't match", NUMBER_UNIQUE_TERMS, cs.getNumberOfUniqueTerms());
}

assertEquals(4, cs.getNumberOfUniqueTerms());
assertEquals(6l, cs.getNumberOfPointers());
assertEquals(4.0d, cs.getAverageDocumentLength(), 0.0d);

assertEquals(4, cs.getNumberOfUniqueTerms());
assertEquals(6l, cs.getNumberOfPointers());
assertEquals(4.0d, cs.getAverageDocumentLength(), 0.0d);

assertEquals(stats1.getAverageDocumentLength(),	stats2.getAverageDocumentLength(), 0.0d);

Javadoc

Returns the documents' average length.

Popular methods of CollectionStatistics

getNumberOfDocuments
Returns the total number of documents in the collection.
getNumberOfPointers
Returns the total number of pointers in the collection.
getNumberOfTokens
Returns the total number of tokens in the collection.
getNumberOfUniqueTerms
Returns the total number of unique terms in the lexicon.
<init>
Constructs an instance of the class with
getNumberOfFields
Returns the number of fields being used to index
getFieldTokens
Returns the length of each field in tokens
readFields
write
getAverageFieldLengths
Returns the average length of each field in tokens
relcaluateAverageLengths

relcaluateAverageLengths

Popular in Java

Making http post requests using okhttp
runOnUiThread (Activity)
scheduleAtFixedRate (Timer)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
TimeZone (java.util)
TimeZone represents a time zone offset, and also figures out daylight savings. Typically, you get a
HttpServletRequest (javax.servlet.http)
Extends the javax.servlet.ServletRequest interface to provide request information for HTTP servlets.
JList (javax.swing)
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
Top plugins for Android Studio

How to use getAverageDocumentLengthmethodin org.terrier.structures.CollectionStatistics

Best Java code snippets using org.terrier.structures.CollectionStatistics.getAverageDocumentLength (Showing top 11 results out of 315)

How to use
getAverageDocumentLength
method
in
org.terrier.structures.CollectionStatistics