private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt( IO_MAPFILE_BLOOM_SIZE_KEY, IO_MAPFILE_BLOOM_SIZE_DEFAULT); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat( IO_MAPFILE_BLOOM_ERROR_RATE_KEY, IO_MAPFILE_BLOOM_ERROR_RATE_DEFAULT); vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
private void initBloomFilter(Path dirName, Configuration conf) { DataInputStream in = null; try { FileSystem fs = dirName.getFileSystem(conf); in = fs.open(new Path(dirName, BLOOM_FILE_NAME)); bloomFilter = new DynamicBloomFilter(); bloomFilter.readFields(in); in.close(); in = null; } catch (IOException ioe) { LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile."); bloomFilter = null; } finally { IOUtils.closeStream(in); } }
private void initBloomFilter(FileSystem fs, String dirName, Configuration conf) { try { DataInputStream in = fs.open(new Path(dirName, BLOOM_FILE_NAME)); bloomFilter = new DynamicBloomFilter(); bloomFilter.readFields(in); in.close(); } catch (IOException ioe) { LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile."); bloomFilter = null; } }
@Nonnull public static DynamicBloomFilter newDynamicBloomFilter( @Nonnegative final int expectedNumberOfElements, @Nonnegative final float errorRate, @Nonnegative final int nbHash) { int vectorSize = (int) Math.ceil((-nbHash * expectedNumberOfElements) / Math.log(1.d - Math.pow(errorRate, 1.d / nbHash))); return new DynamicBloomFilter(vectorSize, nbHash, Hash.MURMUR_HASH, expectedNumberOfElements); }
private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f); vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f); vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f); vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt("io.mapfile.bloom.size", 1024 * 1024); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat("io.mapfile.bloom.error.rate", 0.005f); vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
@Nonnull private Filter getFilter(@Nonnull final Text bloomStr) throws HiveException { final Filter bloom; if (prevBf != null && prevBfStr.equals(bloomStr)) { bloom = prevBf; } else { try { bloom = BloomFilterUtils.deserialize(bloomStr, new DynamicBloomFilter()); } catch (IOException e) { throw new HiveException(e); } this.prevBfStr = new Text(bloomStr); this.prevBf = bloom; } return bloom; }
private synchronized void initBloomFilter(Configuration conf) { numKeys = conf.getInt( IO_MAPFILE_BLOOM_SIZE_KEY, IO_MAPFILE_BLOOM_SIZE_DEFAULT); // vector size should be <code>-kn / (ln(1 - c^(1/k)))</code> bits for // single key, where <code> is the number of hash functions, // <code>n</code> is the number of keys and <code>c</code> is the desired // max. error rate. // Our desired error rate is by default 0.005, i.e. 0.5% float errorRate = conf.getFloat( IO_MAPFILE_BLOOM_ERROR_RATE_KEY, IO_MAPFILE_BLOOM_ERROR_RATE_DEFAULT); vectorSize = (int)Math.ceil((double)(-HASH_COUNT * numKeys) / Math.log(1.0 - Math.pow(errorRate, 1.0/HASH_COUNT))); bloomFilter = new DynamicBloomFilter(vectorSize, HASH_COUNT, Hash.getHashType(conf), numKeys); }
private void initBloomFilter(Path dirName, Configuration conf) { DataInputStream in = null; try { FileSystem fs = dirName.getFileSystem(conf); in = fs.open(new Path(dirName, BLOOM_FILE_NAME)); bloomFilter = new DynamicBloomFilter(); bloomFilter.readFields(in); in.close(); in = null; } catch (IOException ioe) { LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile."); bloomFilter = null; } finally { IOUtils.closeStream(in); } }
@Nonnull private Filter getFilter(@Nonnull final Text bloomStr) throws HiveException { final Filter bloom; if (prevBf != null && prevBfStr.equals(bloomStr)) { bloom = prevBf; } else { try { bloom = BloomFilterUtils.deserialize(bloomStr, new DynamicBloomFilter()); } catch (IOException e) { throw new HiveException(e); } this.prevBfStr = new Text(bloomStr); this.prevBf = bloom; } return bloom; }
private void initBloomFilter(Path dirName, Configuration conf) { DataInputStream in = null; try { FileSystem fs = dirName.getFileSystem(conf); in = fs.open(new Path(dirName, BLOOM_FILE_NAME)); bloomFilter = new DynamicBloomFilter(); bloomFilter.readFields(in); in.close(); in = null; } catch (IOException ioe) { LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile."); bloomFilter = null; } finally { IOUtils.closeStream(in); } }
private void initBloomFilter(Path dirName, Configuration conf) { DataInputStream in = null; try { FileSystem fs = dirName.getFileSystem(conf); in = fs.open(new Path(dirName, BLOOM_FILE_NAME)); bloomFilter = new DynamicBloomFilter(); bloomFilter.readFields(in); in.close(); in = null; } catch (IOException ioe) { LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile."); bloomFilter = null; } finally { IOUtils.closeStream(in); } }
private void initBloomFilter(Path dirName, Configuration conf) { DataInputStream in = null; try { FileSystem fs = dirName.getFileSystem(conf); in = fs.open(new Path(dirName, BLOOM_FILE_NAME)); bloomFilter = new DynamicBloomFilter(); bloomFilter.readFields(in); in.close(); in = null; } catch (IOException ioe) { LOG.warn("Can't open BloomFilter: " + ioe + " - fallback to MapFile."); bloomFilter = null; } finally { IOUtils.closeStream(in); } }
@Nullable public Text evaluate(@Nullable Text bloom1Str, @Nullable Text bloom2Str) throws HiveException { if (bloom1Str == null || bloom2Str == null) { return null; } final Filter bloom1; final Filter bloom2; try { bloom1 = BloomFilterUtils.deserialize(bloom1Str, new DynamicBloomFilter()); bloom2 = BloomFilterUtils.deserialize(bloom2Str, new DynamicBloomFilter()); } catch (IOException e) { throw new HiveException(e); } bloom1.or(bloom2); try { return BloomFilterUtils.serialize(bloom1, new Text()); } catch (IOException e) { throw new HiveException(e); } }
public boolean merge(@Nonnull Text partial) throws HiveException { final DynamicBloomFilter other; try { other = BloomFilterUtils.deserialize(partial, new DynamicBloomFilter()); } catch (IOException e) { throw new HiveException(e); } if (filter == null) { this.filter = other; } else { filter.or(other); } return true; }
@Test public void testDynamicBloomFilter() { int hashId = Hash.JENKINS_HASH; Filter filter = new DynamicBloomFilter(bitSize, hashFunctionNumber, Hash.JENKINS_HASH, 3); BloomFilterCommonTester.of(hashId, numInsertions) .withFilterInstance(filter) .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY, BloomFilterTestStrategy.ADD_KEYS_STRATEGY, BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY, BloomFilterTestStrategy.WRITE_READ_STRATEGY, BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY)) .test(); assertNotNull("testDynamicBloomFilter error ", filter.toString()); }
@Test public void testDynamicBloomFilter() { int hashId = Hash.JENKINS_HASH; Filter filter = new DynamicBloomFilter(bitSize, hashFunctionNumber, Hash.JENKINS_HASH, 3); BloomFilterCommonTester.of(hashId, numInsertions) .withFilterInstance(filter) .withTestCases(ImmutableSet.of(BloomFilterTestStrategy.KEY_TEST_STRATEGY, BloomFilterTestStrategy.ADD_KEYS_STRATEGY, BloomFilterTestStrategy.EXCEPTIONS_CHECK_STRATEGY, BloomFilterTestStrategy.WRITE_READ_STRATEGY, BloomFilterTestStrategy.ODD_EVEN_ABSENT_STRATEGY)) .test(); assertNotNull("testDynamicBloomFilter error ", filter.toString()); }
private static Filter getSymmetricFilter(Class<?> filterClass, int numInsertions, int hashType) { int bitSetSize = optimalNumOfBits(numInsertions, 0.03); int hashFunctionNumber = 5; if (filterClass == BloomFilter.class) { return new BloomFilter(bitSetSize, hashFunctionNumber, hashType); } else if (filterClass == CountingBloomFilter.class) { return new CountingBloomFilter(bitSetSize, hashFunctionNumber, hashType); } else if (filterClass == RetouchedBloomFilter.class) { return new RetouchedBloomFilter(bitSetSize, hashFunctionNumber, hashType); } else if (filterClass == DynamicBloomFilter.class) { return new DynamicBloomFilter(bitSetSize, hashFunctionNumber, hashType, 3); } else { //fail fast assertFalse("unexpected filterClass", true); return null; } }