/** * Generate bloom filter, default hashing is {@code Murmur2}. * @param expectedCount The expected number of elements, targeted by this bloom filter. * It is used to size the bloom filter. * @param fpRate the probability that TServer will erroneously return a record that has not * ever been {@code put} into the {@code BloomFilter}. */ public static BloomFilter byCountAndFPRate(int expectedCount, double fpRate) { return byCountAndFPRate(expectedCount, fpRate, HashFunctions.MURMUR2); }
/** * Generate bloom filter, default hashing is {@code Murmur2} and false positive rate is 0.01. * @param expectedCount The expected number of elements, targeted by this bloom filter. * It is used to size the bloom filter. */ public static BloomFilter byCount(int expectedCount) { return byCountAndFPRate(expectedCount, DEFAULT_FP_RATE); }
@Test public void testNumberOfHashes() { assertEquals(BloomFilter.byCountAndFPRate(10, 0.1).getNHashes(), 3); assertEquals(BloomFilter.byCountAndFPRate(100, 0.2).getNHashes(), 2); assertEquals(BloomFilter.byCountAndFPRate(1000, 0.05).getNHashes(), 4); assertEquals(BloomFilter.byCountAndFPRate(10000, 0.01).getNHashes(), 6); assertEquals(BloomFilter.bySizeAndFPRate(10, 0.1).getNHashes(), 3); assertEquals(BloomFilter.bySizeAndFPRate(1000, 0.2).getNHashes(), 2); assertEquals(BloomFilter.bySizeAndFPRate(100000, 0.05).getNHashes(), 4); assertEquals(BloomFilter.bySizeAndFPRate(10000000, 0.01).getNHashes(), 6); }