@Test(dataProvider = "provideStandardErrors") public void testMultiplePositionsPartial(double maxStandardError) { for (int i = 0; i < 100; ++i) { int uniques = ThreadLocalRandom.current().nextInt(getUniqueValuesCount()) + 1; List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5)); assertEquals(estimateCountPartial(values, maxStandardError), estimateGroupByCount(values, maxStandardError)); } }
@Test(dataProvider = "provideStandardErrors") public void testMixedNullsAndNonNulls(double maxStandardError) { int uniques = getUniqueValuesCount(); List<Object> baseline = createRandomSample(uniques, (int) (uniques * 1.5)); // Randomly insert nulls // We need to retain the preexisting order to ensure that the HLL can generate the same estimates. Iterator<Object> iterator = baseline.iterator(); List<Object> mixed = new ArrayList<>(); while (iterator.hasNext()) { mixed.add(ThreadLocalRandom.current().nextBoolean() ? null : iterator.next()); } assertCount(mixed, maxStandardError, estimateGroupByCount(baseline, maxStandardError)); }
@Test(dataProvider = "provideStandardErrors") public void testMultiplePositions(double maxStandardError) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < 500; ++i) { int uniques = ThreadLocalRandom.current().nextInt(getUniqueValuesCount()) + 1; List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5)); long actual = estimateGroupByCount(values, maxStandardError); double error = (actual - uniques) * 1.0 / uniques; stats.addValue(error); } assertLessThan(stats.getMean(), 1.0e-2); assertLessThan(stats.getStandardDeviation(), 1.0e-2 + maxStandardError); }