public void testSizing() { int bitSize = 8 * 128 * 1024; // 128 KB double errorRate = 0.025; // target false positive rate // How many keys can we store in a Bloom filter of this size maintaining // the given false positive rate, not taking into account that the n long maxKeys = BloomFilterUtil.idealMaxKeys(bitSize, errorRate); assertEquals(136570, maxKeys); // A reverse operation: how many bits would we need to store this many keys // and keep the same low false positive rate? long bitSize2 = BloomFilterUtil.computeBitSize(maxKeys, errorRate); // The bit size comes out a little different due to rounding. assertTrue(Math.abs(bitSize2 - bitSize) * 1.0 / bitSize < 1e-5); }
/** * Creates a Bloom filter chunk of the given size. * * @param byteSizeHint the desired number of bytes for the Bloom filter bit * array. Will be increased so that folding is possible. * @param errorRate target false positive rate of the Bloom filter * @param hashType Bloom filter hash function type * @param foldFactor * @param bloomType * @return the new Bloom filter of the desired size */ public static BloomFilterChunk createBySize(int byteSizeHint, double errorRate, int hashType, int foldFactor, BloomType bloomType) { BloomFilterChunk bbf = new BloomFilterChunk(hashType, bloomType); bbf.byteSize = computeFoldableByteSize(byteSizeHint * 8L, foldFactor); long bitSize = bbf.byteSize * 8; bbf.maxKeys = (int) idealMaxKeys(bitSize, errorRate); bbf.hashCount = optimalFunctionCount(bbf.maxKeys, bitSize); // Adjust max keys to bring error rate closer to what was requested, // because byteSize was adjusted to allow for folding, and hashCount was // rounded. bbf.maxKeys = (int) computeMaxKeys(bitSize, errorRate, bbf.hashCount); return bbf; }
@Test public void testCompoundBloomSizing() { int bloomBlockByteSize = 4096; int bloomBlockBitSize = bloomBlockByteSize * 8; double targetErrorRate = 0.01; long maxKeysPerChunk = BloomFilterUtil.idealMaxKeys(bloomBlockBitSize, targetErrorRate); long bloomSize1 = bloomBlockByteSize * 8; long bloomSize2 = BloomFilterUtil.computeBitSize(maxKeysPerChunk, targetErrorRate); double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1); assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001); }
public void testSizing() { int bitSize = 8 * 128 * 1024; // 128 KB double errorRate = 0.025; // target false positive rate // How many keys can we store in a Bloom filter of this size maintaining // the given false positive rate, not taking into account that the n long maxKeys = BloomFilterUtil.idealMaxKeys(bitSize, errorRate); assertEquals(136570, maxKeys); // A reverse operation: how many bits would we need to store this many keys // and keep the same low false positive rate? long bitSize2 = BloomFilterUtil.computeBitSize(maxKeys, errorRate); // The bit size comes out a little different due to rounding. assertTrue(Math.abs(bitSize2 - bitSize) * 1.0 / bitSize < 1e-5); }
@Test public void testCompoundBloomSizing() { int bloomBlockByteSize = 4096; int bloomBlockBitSize = bloomBlockByteSize * 8; double targetErrorRate = 0.01; long maxKeysPerChunk = BloomFilterUtil.idealMaxKeys(bloomBlockBitSize, targetErrorRate); long bloomSize1 = bloomBlockByteSize * 8; long bloomSize2 = BloomFilterUtil.computeBitSize(maxKeysPerChunk, targetErrorRate); double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1); assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001); }