/** * Determines & initializes bloom filter meta data from user config. Call * {@link #allocBloom()} to allocate bloom filter data. * * @param maxKeys Maximum expected number of keys that will be stored in this * bloom * @param errorRate Desired false positive error rate. Lower rate = more * storage required * @param hashType Type of hash function to use * @param foldFactor When finished adding entries, you may be able to 'fold' * this bloom to save space. Tradeoff potentially excess bytes in * bloom for ability to fold if keyCount is exponentially greater * than maxKeys. * @throws IllegalArgumentException */ // Used only in testcases public BloomFilterChunk(int maxKeys, double errorRate, int hashType, int foldFactor) throws IllegalArgumentException { this(hashType, BloomType.ROW); long bitSize = BloomFilterUtil.computeBitSize(maxKeys, errorRate); hashCount = BloomFilterUtil.optimalFunctionCount(maxKeys, bitSize); this.maxKeys = maxKeys; // increase byteSize so folding is possible byteSize = BloomFilterUtil.computeFoldableByteSize(bitSize, foldFactor); sanityCheck(); }
/** * Creates a Bloom filter chunk of the given size. * * @param byteSizeHint the desired number of bytes for the Bloom filter bit * array. Will be increased so that folding is possible. * @param errorRate target false positive rate of the Bloom filter * @param hashType Bloom filter hash function type * @param foldFactor * @param bloomType * @return the new Bloom filter of the desired size */ public static BloomFilterChunk createBySize(int byteSizeHint, double errorRate, int hashType, int foldFactor, BloomType bloomType) { BloomFilterChunk bbf = new BloomFilterChunk(hashType, bloomType); bbf.byteSize = computeFoldableByteSize(byteSizeHint * 8L, foldFactor); long bitSize = bbf.byteSize * 8; bbf.maxKeys = (int) idealMaxKeys(bitSize, errorRate); bbf.hashCount = optimalFunctionCount(bbf.maxKeys, bitSize); // Adjust max keys to bring error rate closer to what was requested, // because byteSize was adjusted to allow for folding, and hashCount was // rounded. bbf.maxKeys = (int) computeMaxKeys(bitSize, errorRate, bbf.hashCount); return bbf; }