@Test(dataProvider = "provideStandardErrors") public void testMixedNullsAndNonNulls(double maxStandardError) { int uniques = getUniqueValuesCount(); List<Object> baseline = createRandomSample(uniques, (int) (uniques * 1.5)); // Randomly insert nulls // We need to retain the preexisting order to ensure that the HLL can generate the same estimates. Iterator<Object> iterator = baseline.iterator(); List<Object> mixed = new ArrayList<>(); while (iterator.hasNext()) { mixed.add(ThreadLocalRandom.current().nextBoolean() ? null : iterator.next()); } assertCount(mixed, maxStandardError, estimateGroupByCount(baseline, maxStandardError)); }
private Page createPage(List<?> values, double maxStandardError) { if (values.isEmpty()) { return new Page(0); } else { return new Page(values.size(), createBlock(getValueType(), values), createBlock(DOUBLE, ImmutableList.copyOf(Collections.nCopies(values.size(), maxStandardError)))); } }
private long estimateCountPartial(List<?> values, double maxStandardError) { Object result = AggregationTestUtils.partialAggregation(getAggregationFunction(), createPage(values, maxStandardError)); return (long) result; }
@Test(dataProvider = "provideStandardErrors") public void testMultiplePositionsPartial(double maxStandardError) { for (int i = 0; i < 100; ++i) { int uniques = ThreadLocalRandom.current().nextInt(getUniqueValuesCount()) + 1; List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5)); assertEquals(estimateCountPartial(values, maxStandardError), estimateGroupByCount(values, maxStandardError)); } }
@Test(dataProvider = "provideStandardErrors") public void testSinglePosition(double maxStandardError) { assertCount(ImmutableList.of(randomValue()), maxStandardError, 1); }
protected void assertCount(List<?> values, double maxStandardError, long expectedCount) { if (!values.isEmpty()) { assertEquals(estimateGroupByCount(values, maxStandardError), expectedCount); } assertEquals(estimateCount(values, maxStandardError), expectedCount); assertEquals(estimateCountPartial(values, maxStandardError), expectedCount); }
@Test(dataProvider = "provideStandardErrors") public void testMultiplePositions(double maxStandardError) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < 500; ++i) { int uniques = ThreadLocalRandom.current().nextInt(getUniqueValuesCount()) + 1; List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5)); long actual = estimateGroupByCount(values, maxStandardError); double error = (actual - uniques) * 1.0 / uniques; stats.addValue(error); } assertLessThan(stats.getMean(), 1.0e-2); assertLessThan(stats.getStandardDeviation(), 1.0e-2 + maxStandardError); }
@Test(dataProvider = "provideStandardErrors") public void testMultiplePositionsPartial(double maxStandardError) throws Exception { for (int i = 0; i < 100; ++i) { int uniques = ThreadLocalRandom.current().nextInt(20000) + 1; List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5)); assertEquals(estimateCountPartial(values, maxStandardError), estimateGroupByCount(values, maxStandardError)); } }
@Test(dataProvider = "provideStandardErrors") public void testMixedNullsAndNonNulls(double maxStandardError) throws Exception { List<Object> baseline = createRandomSample(10000, 15000); // Randomly insert nulls // We need to retain the preexisting order to ensure that the HLL can generate the same estimates. Iterator<Object> iterator = baseline.iterator(); List<Object> mixed = new ArrayList<>(); while (iterator.hasNext()) { mixed.add(ThreadLocalRandom.current().nextBoolean() ? null : iterator.next()); } assertCount(mixed, maxStandardError, estimateGroupByCount(baseline, maxStandardError)); }
@Test(dataProvider = "provideStandardErrors") public void testNoPositions(double maxStandardError) { assertCount(ImmutableList.of(), maxStandardError, 0); }
@Test(dataProvider = "provideStandardErrors") public void testMultiplePositions(double maxStandardError) throws Exception { DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < 500; ++i) { int uniques = ThreadLocalRandom.current().nextInt(20000) + 1; List<Object> values = createRandomSample(uniques, (int) (uniques * 1.5)); long actual = estimateGroupByCount(values, maxStandardError); double error = (actual - uniques) * 1.0 / uniques; stats.addValue(error); } assertLessThan(stats.getMean(), 1.0e-2); assertLessThan(Math.abs(stats.getStandardDeviation() - maxStandardError), 1.0e-2); }
private List<Object> createRandomSample(int uniques, int total) { Preconditions.checkArgument(uniques <= total, "uniques (%s) must be <= total (%s)", uniques, total); List<Object> result = new ArrayList<>(total); result.addAll(makeRandomSet(uniques)); Random random = ThreadLocalRandom.current(); while (result.size() < total) { int index = random.nextInt(result.size()); result.add(result.get(index)); } return result; }
private void assertCount(List<Object> values, double maxStandardError, long expectedCount) { if (!values.isEmpty()) { assertEquals(estimateGroupByCount(values, maxStandardError), expectedCount); } assertEquals(estimateCount(values, maxStandardError), expectedCount); assertEquals(estimateCountPartial(values, maxStandardError), expectedCount); }
@Test(dataProvider = "provideStandardErrors") public void testSinglePosition(double maxStandardError) throws Exception { assertCount(ImmutableList.of(randomValue()), maxStandardError, 1); }
@Test(dataProvider = "provideStandardErrors") public void testAllPositionsNull(double maxStandardError) { assertCount(Collections.nCopies(100, null), maxStandardError, 0); }
private List<Object> createRandomSample(int uniques, int total) { Preconditions.checkArgument(uniques <= total, "uniques (%s) must be <= total (%s)", uniques, total); List<Object> result = new ArrayList<>(total); result.addAll(makeRandomSet(uniques)); Random random = ThreadLocalRandom.current(); while (result.size() < total) { int index = random.nextInt(result.size()); result.add(result.get(index)); } return result; }
private long estimateCount(List<?> values, double maxStandardError) { Object result = AggregationTestUtils.aggregation(getAggregationFunction(), createPage(values, maxStandardError)); return (long) result; }
private Page createPage(List<Object> values, double maxStandardError) { if (values.isEmpty()) { return new Page(0); } else { return new Page(values.size(), createBlock(getValueType(), values), createBlock(DOUBLE, ImmutableList.copyOf(Collections.nCopies(values.size(), maxStandardError)))); } }
@Test(dataProvider = "provideStandardErrors") public void testNoPositions(double maxStandardError) throws Exception { assertCount(ImmutableList.of(), maxStandardError, 0); }
private long estimateGroupByCount(List<?> values, double maxStandardError) { Object result = AggregationTestUtils.groupedAggregation(getAggregationFunction(), createPage(values, maxStandardError)); return (long) result; }