@Override public void hashValues(BaseDoubleColumnValueSelector selector, HyperLogLogCollector collector) { if (NullHandling.replaceWithDefault() || !selector.isNull()) { collector.add(CardinalityAggregator.hashFn.hashLong(Double.doubleToLongBits(selector.getDouble())).asBytes()); } } }
public static short getShard(long longValue, int totalShards) { if (totalShards <= 1) { return 0; } long hash = hashFunc.hashLong(longValue).asLong(); return _getShard(hash, totalShards); }
private static void assertHashLongEquivalence(HashFunction hashFunction, Random random) { long l = random.nextLong(); assertEquals(hashFunction.hashLong(l), hashFunction.newHasher().putLong(l).hash()); }
public void testConcatenatingHashFunction_makeHash() { byte[] md5Hash = Hashing.md5().hashLong(42L).asBytes(); byte[] murmur3Hash = Hashing.murmur3_32().hashLong(42L).asBytes(); byte[] combined = new byte[md5Hash.length + murmur3Hash.length]; ByteBuffer buffer = ByteBuffer.wrap(combined); buffer.put(md5Hash); buffer.put(murmur3Hash); HashCode expected = HashCode.fromBytes(combined); assertEquals( expected, Hashing.concatenating(Hashing.md5(), Hashing.murmur3_32()).hashLong(42L)); assertEquals( expected, Hashing.concatenating(asList(Hashing.md5(), Hashing.murmur3_32())).hashLong(42L)); }
@Override public void hashValues(BaseLongColumnValueSelector selector, HyperLogLogCollector collector) { if (NullHandling.replaceWithDefault() || !selector.isNull()) { collector.add(CardinalityAggregator.hashFn.hashLong(selector.getLong()).asBytes()); } } }
private void fillCollector(HyperLogLogCollector collector) { Random rand = new Random(758190); for (long i = 0; i < NUM_HASHES; ++i) { collector.add(hashFunction.hashLong(rand.nextLong()).asBytes()); } }
private void onAccess(long key) { sample++; if (Math.floorMod(hasher.hashLong(key).asInt(), R) < 1) { for (WindowTinyLfuPolicy policy : minis) { policy.record(key); } } }
public void testKnownLongInputs() { assertHash(1669671676, murmur3_32().hashLong(0L)); assertHash(-846261623, murmur3_32().hashLong(-42L)); assertHash(1871679806, murmur3_32().hashLong(42L)); assertHash(1366273829, murmur3_32().hashLong(Long.MIN_VALUE)); assertHash(-2106506049, murmur3_32().hashLong(Long.MAX_VALUE)); }
int coercedHash = Hashing.murmur3_32().hashLong(addressAsLong).asInt();
@Test public void testCompare1() { HyperLogLogCollector collector1 = HyperLogLogCollector.makeLatestCollector(); HyperLogLogCollector collector2 = HyperLogLogCollector.makeLatestCollector(); collector1.add(fn.hashLong(0).asBytes()); HyperUniquesAggregatorFactory factory = new HyperUniquesAggregatorFactory("foo", "bar"); Comparator comparator = factory.getComparator(); for (int i = 1; i < 100; i = i + 2) { collector1.add(fn.hashLong(i).asBytes()); collector2.add(fn.hashLong(i + 1).asBytes()); Assert.assertEquals(1, comparator.compare(collector1, collector2)); Assert.assertEquals(1, Double.compare(collector1.estimateCardinality(), collector2.estimateCardinality())); } }
@Test public void testCompute() { Random random = new Random(0L); HyperUniqueFinalizingPostAggregator postAggregator = new HyperUniqueFinalizingPostAggregator( "uniques", "uniques" ); HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector(); for (int i = 0; i < 100; ++i) { byte[] hashedVal = fn.hashLong(random.nextLong()).asBytes(); collector.add(hashedVal); } double cardinality = (Double) postAggregator.compute(ImmutableMap.of("uniques", collector)); Assert.assertTrue(cardinality == 99.37233005831612); }
@Test public void testSparseEstimation() { final Random random = new Random(0); HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector(); for (int i = 0; i < 100; ++i) { collector.add(fn.hashLong(random.nextLong()).asBytes()); } Assert.assertEquals( collector.estimateCardinality(), HyperLogLogCollector.estimateByteBuffer(collector.toByteBuffer()), 0.0d ); }
@Ignore @Test public void showErrorRate() { HashFunction fn = Hashing.murmur3_128(); Random random = ThreadLocalRandom.current(); double error = 0.0d; int count = 0; final int[] valsToCheck = { 10, 20, 50, 100, 1000, 2000, 5000, 10000, 20000, 50000, 100000, 1000000, 2000000, 10000000, Integer.MAX_VALUE }; for (int numThings : valsToCheck) { long startTime = System.currentTimeMillis(); HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector(); for (int i = 0; i < numThings; ++i) { if (i != 0 && i % 100000000 == 0) { ++count; error = computeError(error, count, i, startTime, collector); } collector.add(fn.hashLong(random.nextLong()).asBytes()); } ++count; error = computeError(error, count, numThings, startTime, collector); } }
@Test public void testEstimationLimitDifferentFromCapacity() { Random random = new Random(0L); final int[] valsToCheck = {10, 20, 50, 100, 1000, 2000, 5000, 10000, 20000, 50000, 100000, 1000000, 2000000}; final double[] expectedVals = { 11.029647221949576, 21.108407720752034, 51.64575281885815, 100.42231726408892, 981.8579991802412, 1943.1337257462792, 4946.192042635218, 9935.088157579434, 20366.1486889433, 49433.56029693898, 100615.26273314281, 980831.624899156000, 1982408.2608981386 }; int valsToCheckIndex = 0; HyperLogLogCollector collector = HyperLogLogCollector.makeCollector( (ByteBuffer) ByteBuffer.allocate(10000) .position(0) .limit(HyperLogLogCollector.getLatestNumBytesForDenseStorage()) ); for (int i = 0; i < valsToCheck[valsToCheck.length - 1]; ++i) { collector.add(fn.hashLong(random.nextLong()).asBytes()); if (i == valsToCheck[valsToCheckIndex]) { Assert.assertEquals(expectedVals[valsToCheckIndex], collector.estimateCardinality(), 0.0d); ++valsToCheckIndex; } } Assert.assertEquals(expectedVals.length, valsToCheckIndex + 1); Assert.assertEquals(expectedVals[valsToCheckIndex], collector.estimateCardinality(), 0.0d); }
byte[] hashedVal = fn.hashLong(random.nextLong()).asBytes(); collector.add(hashedVal);
@Test public void testEstimation() { Random random = new Random(0L); final int[] valsToCheck = {10, 20, 50, 100, 1000, 2000, 5000, 10000, 20000, 50000, 100000, 1000000, 2000000}; final double[] expectedVals = { 11.029647221949576, 21.108407720752034, 51.64575281885815, 100.42231726408892, 981.8579991802412, 1943.1337257462792, 4946.192042635218, 9935.088157579434, 20366.1486889433, 49433.56029693898, 100615.26273314281, 980831.624899156000, 1982408.2608981386 }; int valsToCheckIndex = 0; HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector(); for (int i = 0; i < valsToCheck[valsToCheck.length - 1]; ++i) { collector.add(fn.hashLong(random.nextLong()).asBytes()); if (i == valsToCheck[valsToCheckIndex]) { Assert.assertEquals(expectedVals[valsToCheckIndex], collector.estimateCardinality(), 0.0d); ++valsToCheckIndex; } } Assert.assertEquals(expectedVals.length, valsToCheckIndex + 1); Assert.assertEquals(expectedVals[valsToCheckIndex], collector.estimateCardinality(), 0.0d); }
@Test public void testEstimationReadOnlyByteBuffers() { Random random = new Random(0L); final int[] valsToCheck = {10, 20, 50, 100, 1000, 2000, 5000, 10000, 20000, 50000, 100000, 1000000, 2000000}; final double[] expectedVals = { 11.029647221949576, 21.108407720752034, 51.64575281885815, 100.42231726408892, 981.8579991802412, 1943.1337257462792, 4946.192042635218, 9935.088157579434, 20366.1486889433, 49433.56029693898, 100615.26273314281, 980831.624899156000, 1982408.2608981386 }; int valsToCheckIndex = 0; HyperLogLogCollector collector = HyperLogLogCollector.makeCollector( ByteBuffer.allocateDirect( HyperLogLogCollector.getLatestNumBytesForDenseStorage() ) ); for (int i = 0; i < valsToCheck[valsToCheck.length - 1]; ++i) { collector.add(fn.hashLong(random.nextLong()).asBytes()); if (i == valsToCheck[valsToCheckIndex]) { Assert.assertEquals(expectedVals[valsToCheckIndex], collector.estimateCardinality(), 0.0d); ++valsToCheckIndex; } } Assert.assertEquals(expectedVals.length, valsToCheckIndex + 1); Assert.assertEquals(expectedVals[valsToCheckIndex], collector.estimateCardinality(), 0.0d); }
@Ignore @Test // This test can help when finding potential combinations that are weird, but it's non-deterministic public void testFoldingwithDifferentOffsets() { // final Random random = new Random(37); // this seed will cause this test to fail because of slightly larger errors final Random random = new Random(0); for (int j = 0; j < 10; j++) { HyperLogLogCollector smallVals = HyperLogLogCollector.makeLatestCollector(); HyperLogLogCollector bigVals = HyperLogLogCollector.makeLatestCollector(); HyperLogLogCollector all = HyperLogLogCollector.makeLatestCollector(); int numThings = 500000; for (int i = 0; i < numThings; i++) { byte[] hashedVal = fn.hashLong(random.nextLong()).asBytes(); if (i < 1000) { smallVals.add(hashedVal); } else { bigVals.add(hashedVal); } all.add(hashedVal); } HyperLogLogCollector folded = HyperLogLogCollector.makeLatestCollector(); folded.fold(smallVals); folded.fold(bigVals); final double expected = all.estimateCardinality(); Assert.assertEquals(expected, folded.estimateCardinality(), expected * 0.025); Assert.assertEquals(numThings, folded.estimateCardinality(), numThings * 0.05); } }
@Test public void testFolding() { final Random random = new Random(0); final int[] numValsToCheck = {10, 20, 50, 100, 1000, 2000}; for (int numThings : numValsToCheck) { HyperLogLogCollector allCombined = HyperLogLogCollector.makeLatestCollector(); HyperLogLogCollector oneHalf = HyperLogLogCollector.makeLatestCollector(); HyperLogLogCollector otherHalf = HyperLogLogCollector.makeLatestCollector(); for (int i = 0; i < numThings; ++i) { byte[] hashedVal = fn.hashLong(random.nextLong()).asBytes(); allCombined.add(hashedVal); if (i % 2 == 0) { oneHalf.add(hashedVal); } else { otherHalf.add(hashedVal); } } HyperLogLogCollector folded = HyperLogLogCollector.makeLatestCollector(); folded.fold(oneHalf); Assert.assertEquals(oneHalf, folded); Assert.assertEquals(oneHalf.estimateCardinality(), folded.estimateCardinality(), 0.0d); folded.fold(otherHalf); Assert.assertEquals(allCombined, folded); Assert.assertEquals(allCombined.estimateCardinality(), folded.estimateCardinality(), 0.0d); } }
@Test public void testFoldingByteBuffers() { final Random random = new Random(0); final int[] numValsToCheck = {10, 20, 50, 100, 1000, 2000}; for (int numThings : numValsToCheck) { HyperLogLogCollector allCombined = HyperLogLogCollector.makeLatestCollector(); HyperLogLogCollector oneHalf = HyperLogLogCollector.makeLatestCollector(); HyperLogLogCollector otherHalf = HyperLogLogCollector.makeLatestCollector(); for (int i = 0; i < numThings; ++i) { byte[] hashedVal = fn.hashLong(random.nextLong()).asBytes(); allCombined.add(hashedVal); if (i % 2 == 0) { oneHalf.add(hashedVal); } else { otherHalf.add(hashedVal); } } HyperLogLogCollector folded = HyperLogLogCollector.makeLatestCollector(); folded.fold(oneHalf.toByteBuffer()); Assert.assertEquals(oneHalf, folded); Assert.assertEquals(oneHalf.estimateCardinality(), folded.estimateCardinality(), 0.0d); folded.fold(otherHalf.toByteBuffer()); Assert.assertEquals(allCombined, folded); Assert.assertEquals(allCombined.estimateCardinality(), folded.estimateCardinality(), 0.0d); } }