public static boolean contains(byte[] buf, int offset, int length, ByteBuff bloomBuf, int bloomOffset, int bloomSize, Hash hash, int hashCount) { HashKey<byte[]> hashKey = new ByteArrayHashKey(buf, offset, length); return contains(bloomBuf, bloomOffset, bloomSize, hash, hashCount, hashKey); }
public static boolean contains(Cell cell, ByteBuff bloomBuf, int bloomOffset, int bloomSize, Hash hash, int hashCount, BloomType type) { HashKey<Cell> hashKey = type == BloomType.ROWCOL ? new RowColBloomHashKey(cell) : new RowBloomHashKey(cell); return contains(bloomBuf, bloomOffset, bloomSize, hash, hashCount, hashKey); }
@Override public boolean contains(byte[] key, int keyOffset, int keyLength, ByteBuff bloom) { int block = index.rootBlockContainingKey(key, keyOffset, keyLength); if (block < 0) { return false; // This key is not in the file. } boolean result; HFileBlock bloomBlock = getBloomBlock(block); try { ByteBuff bloomBuf = bloomBlock.getBufferReadOnly(); result = BloomFilterUtil.contains(key, keyOffset, keyLength, bloomBuf, bloomBlock.headerSize(), bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount); } finally { // After the use return back the block if it was served from a cache. reader.returnBlock(bloomBlock); } if (numPositivesPerChunk != null && result) { // Update statistics. Only used in unit tests. ++numPositivesPerChunk[block]; } return result; }
@Override public boolean contains(Cell keyCell, ByteBuff bloom, BloomType type) { int block = index.rootBlockContainingKey(keyCell); if (block < 0) { return false; // This key is not in the file. } boolean result; HFileBlock bloomBlock = getBloomBlock(block); try { ByteBuff bloomBuf = bloomBlock.getBufferReadOnly(); result = BloomFilterUtil.contains(keyCell, bloomBuf, bloomBlock.headerSize(), bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount, type); } finally { // After the use return back the block if it was served from a cache. reader.returnBlock(bloomBlock); } if (numPositivesPerChunk != null && result) { // Update statistics. Only used in unit tests. ++numPositivesPerChunk[block]; } return result; }
if (BloomFilterUtil.contains(bytes, 0, bytes.length, new MultiByteBuff(b.bloom), 0, (int) b.byteSize, b.hash, b.hashCount)) { if (i >= 1 * 1000 * 1000)
assertTrue(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount)); assertFalse(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount)); assertFalse(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bf2.bloom), 0, (int) bf2.byteSize, bf2.hash, bf2.hashCount)); assertTrue(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bf2.bloom), 0, (int) bf2.byteSize, bf2.hash, bf2.hashCount)); assertTrue(BloomFilterUtil.contains(bkey, 0, bkey.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount)); assertTrue(BloomFilterUtil.contains(bval, 1, bval.length - 1, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount)); assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount)); BloomFilterChunk newBf1 = new BloomFilterChunk(1000, (float)0.01, Hash.MURMUR_HASH, 0); assertTrue(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount)); assertFalse(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount)); assertTrue(BloomFilterUtil.contains(bkey, 0, bkey.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount)); assertTrue(BloomFilterUtil.contains(bval, 1, bval.length - 1, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount)); assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount)); assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bb), 0,
public void testBloomFold() throws Exception { // test: foldFactor < log(max/actual) BloomFilterChunk b = new BloomFilterChunk(1003, (float) 0.01, Hash.MURMUR_HASH, 2); b.allocBloom(); long origSize = b.getByteSize(); assertEquals(1204, origSize); for (int i = 0; i < 12; ++i) { byte[] ib = Bytes.toBytes(i); b.add(ib, 0, ib.length); } b.compactBloom(); assertEquals(origSize>>2, b.getByteSize()); int falsePositives = 0; for (int i = 0; i < 25; ++i) { byte[] bytes = Bytes.toBytes(i); if (BloomFilterUtil.contains(bytes, 0, bytes.length, new MultiByteBuff(b.bloom), 0, (int) b.byteSize, b.hash, b.hashCount)) { if (i >= 12) falsePositives++; } else { assertFalse(i < 12); } } assertTrue(falsePositives <= 1); // test: foldFactor > log(max/actual) }
if (BloomFilterUtil.contains(bytes, 0, bytes.length, new MultiByteBuff(b.bloom), 0, (int) b.byteSize, b.hash, b.hashCount)) { if (i >= 1 * 1000 * 1000)
assertTrue(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount)); assertFalse(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount)); assertFalse(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bf2.bloom), 0, (int) bf2.byteSize, bf2.hash, bf2.hashCount)); assertTrue(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bf2.bloom), 0, (int) bf2.byteSize, bf2.hash, bf2.hashCount)); assertTrue(BloomFilterUtil.contains(bkey, 0, bkey.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount)); assertTrue(BloomFilterUtil.contains(bval, 1, bval.length - 1, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount)); assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bf1.bloom), 0, (int) bf1.byteSize, bf1.hash, bf1.hashCount)); BloomFilterChunk newBf1 = new BloomFilterChunk(1000, (float)0.01, Hash.MURMUR_HASH, 0); assertTrue(BloomFilterUtil.contains(key1, 0, key1.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount)); assertFalse(BloomFilterUtil.contains(key2, 0, key2.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount)); assertTrue(BloomFilterUtil.contains(bkey, 0, bkey.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount)); assertTrue(BloomFilterUtil.contains(bval, 1, bval.length - 1, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount)); assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bb), 0, (int) newBf1.byteSize, newBf1.hash, newBf1.hashCount)); assertFalse(BloomFilterUtil.contains(bval, 0, bval.length, new MultiByteBuff(bb), 0,
public void testBloomFold() throws Exception { // test: foldFactor < log(max/actual) BloomFilterChunk b = new BloomFilterChunk(1003, (float) 0.01, Hash.MURMUR_HASH, 2); b.allocBloom(); long origSize = b.getByteSize(); assertEquals(1204, origSize); for (int i = 0; i < 12; ++i) { byte[] ib = Bytes.toBytes(i); b.add(ib, 0, ib.length); } b.compactBloom(); assertEquals(origSize>>2, b.getByteSize()); int falsePositives = 0; for (int i = 0; i < 25; ++i) { byte[] bytes = Bytes.toBytes(i); if (BloomFilterUtil.contains(bytes, 0, bytes.length, new MultiByteBuff(b.bloom), 0, (int) b.byteSize, b.hash, b.hashCount)) { if (i >= 12) falsePositives++; } else { assertFalse(i < 12); } } assertTrue(falsePositives <= 1); // test: foldFactor > log(max/actual) }