/** Normalises the data in the specified array to be in range [0,1], with * 0 as the minimum, and 1 as the maximum. RETURNS THE SAME ARRAY OBJECT * - i.e. changes are made in place. * @param data */ public static double[] standardNormalisation(final double[] data) { final int l = data.length; if (l==0) return data; final double min = StaTools.min(data); final double max = StaTools.max(data); if (max == 0 && min == 0) return data; final double product = (max != min) ? 1.0d/ ( max - min) : 1.d/max; for(int i=0;i<l;i++) { data[i] = (data[i] - min) * product; } return data; }
protected static void makeMaxMin(final double[] staticScores2) { StaTools.standardNormalisation(staticScores2); }
/** * The variance of an array of double values. * @param data The array of double values. * @return The variance. */ public static double variance(double[] data) { double var = 0d; int n = data.length; final double mean = mean(data); for (int i=0; i<n; i++) var+=(data[i]-mean)*(data[i]-mean); var /= n; return var; }
@Override public double score(Posting _p) { FieldPosting p = (FieldPosting)_p; final int[] tff = p.getFieldFrequencies(); final int[] fieldLengths = p.getFieldLengths(); final double[] normFieldFreqs = new double[fieldCount]; for(int i=0;i<fieldCount;i++) { if (tff[i] != 0.0d) normFieldFreqs[i] = fieldWeights[i] * fieldNormalisations[i].normalise(tff[i], fieldLengths[i], fieldGlobalFrequencies[i]); } final double tf = StaTools.sum(normFieldFreqs); //System.err.println("tf=" + tf); if (tf == 0.0d) return 0.0d; return basicModel.score(tf, super.documentFrequency, super.termFrequency, super.keyFrequency, p.getDocumentLength()); }
protected void checkCollectionStatistics(Index index) { final CollectionStatistics cs = index.getCollectionStatistics(); System.err.println("num docs=" + cs.getNumberOfDocuments()); assertEquals("Number of documents doesn't match", DOCUMENT_LENGTHS.length, cs.getNumberOfDocuments()); assertEquals("Number of tokens doesn't match", StaTools.sum(DOCUMENT_LENGTHS), cs.getNumberOfTokens()); assertEquals("Average document length doesn't match", StaTools.mean(DOCUMENT_LENGTHS), cs.getAverageDocumentLength(), 0.0d); assertEquals("Number of pointers doesnt match", NUMBER_POINTERS, cs.getNumberOfPointers()); assertEquals("Number of unique terms doesn't match", NUMBER_UNIQUE_TERMS, cs.getNumberOfUniqueTerms()); }
/** {@inheritDoc} */ public int numberOfEntries() { if (approximateNumberofEntries) return StaTools.max(numTerms); else return uniqueTerms.size(); }
/** * This method returns the standard error of the mean for an array of data. * @param data The sampled data. * @return The standard error of the mean. */ public static double stdErrorOfTheMean(double[] data){ return standardDeviation(data) / Math.sqrt(data.length); }
/** * The standard deviation of an array of double values. * @param data The array of double values. * @return The standrad deviation. */ public static double standardDeviation(double[] data) { return Math.sqrt(variance(data)); } /**
@Test public void testMeanNonZero() { assertEquals(1.0d, StaTools.meanNonZero(new double[]{0.0d, 1.0d}), 0.0d); assertEquals(0.75d, StaTools.meanNonZero(new double[]{0.0d, 0.5d, 1.0d}), 0.0d); }
@Override public double score(Posting _p) { FieldPosting p = (FieldPosting)_p; final int[] tff = p.getFieldFrequencies(); final int[] fieldLengths = p.getFieldLengths(); final double[] normFieldFreqs = new double[fieldCount]; for(int i=0;i<fieldCount;i++) { if (tff[i] != 0.0d) normFieldFreqs[i] = fieldWeights[i] * fieldNormalisations[i].normalise(tff[i], fieldLengths[i], fieldGlobalFrequencies[i]); } final double tf = StaTools.sum(normFieldFreqs); //System.err.println("tf=" + tf); if (tf == 0.0d) return 0.0d; return basicModel.score(tf, super.documentFrequency, super.termFrequency, super.keyFrequency, p.getDocumentLength()); }
/** * This method returns the standard error of the mean for an array of data. * @param data The sampled data. * @return The standard error of the mean. */ public static double stdErrorOfTheMean(double[] data){ return standardDeviation(data) / Math.sqrt(data.length); }
/** * The standard deviation of an array of double values. * @param data The array of double values. * @return The standrad deviation. */ public static double standardDeviation(double[] data) { return Math.sqrt(variance(data)); } /**
/** Normalises the data in the specified array to be in range [0,1], with * 0 as the minimum, and 1 as the maximum. RETURNS THE SAME ARRAY OBJECT * - i.e. changes are made in place. * @param data */ public static float[] standardNormalisation(final float[] data) { final int l = data.length; if (l==0) return data; final float min = min(data); final float max = max(data); if (max == 0 && min == 0) return data; final float product = (max != min) ? 1.0f/ ( max - min) : 1.f/max; for(int i=0;i<l;i++) { data[i] = (data[i] - min) * product; } return data; }
protected static void makeMaxMin(final double[] staticScores2) { StaTools.standardNormalisation(staticScores2); }
assert cnt == StaTools.sum(bfs, i); tfsCodec.compress(bfs, i, output); blocksCodec.compress(blocks, cnt, output);
/** * The variance of an array of double values. * @param data The array of double values. * @return The variance. */ public static double variance(double[] data) { double var = 0d; int n = data.length; final double mean = mean(data); for (int i=0; i<n; i++) var+=(data[i]-mean)*(data[i]-mean); var /= n; return var; }
/** Normalises the data in the specified array to be in range [0,1], with * 0 as the minimum, and 1 as the maximum. RETURNS THE SAME ARRAY OBJECT * - i.e. changes are made in place. * @param data */ public static double[] standardNormalisation(final double[] data) { final int l = data.length; if (l==0) return data; final double min = StaTools.min(data); final double max = StaTools.max(data); if (max == 0 && min == 0) return data; final double product = (max != min) ? 1.0d/ ( max - min) : 1.d/max; for(int i=0;i<l;i++) { data[i] = (data[i] - min) * product; } return data; }
@Override public void printResults(PrintWriter pw, SearchRequest q, String method, String iteration, int numberOfResults) throws IOException { ResultSet r = ((Request) q).getResultSet(); StaTools.standardNormalisation(r.getScores()); if (r instanceof FeaturedResultSet) { final FeaturedResultSet frs = (FeaturedResultSet)r; final String[] featNames = frs.getFeatureNames(); final int f = featNames.length; for (int j = 0; j < f; j++) { StaTools.standardNormalisation(frs.getFeatureScores(featNames[j])); } } super.printResults(pw, q, method, iteration, numberOfResults); }
assert cnt == StaTools.sum(bfs, i); tfsCodec.compress(bfs, i, output); blocksCodec.compress(blocks, cnt, output);