/** * Removes a specified key from <i>this</i> counting Bloom filter. * <p> * <b>Invariant</b>: nothing happens if the specified key does not belong to <i>this</i> counter Bloom filter. * @param key The key to remove. */ public void delete(Key key) { if(key == null) { throw new NullPointerException("Key may not be null"); } if(!membershipTest(key)) { throw new IllegalArgumentException("Key is not a member"); } int[] h = hash.hash(key); hash.clear(); for(int i = 0; i < nbHash; i++) { // find the bucket int wordNum = h[i] >> 4; // div 16 int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4 long bucketMask = 15L << bucketShift; long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift; // only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) { // decrement by 1 buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift); } } }
/** * Constructor * @param vectorSize The vector size of <i>this</i> filter. * @param nbHash The number of hash function to consider. * @param hashType type of the hashing function (see * {@link org.apache.hadoop.util.hash.Hash}). */ public CountingBloomFilter(int vectorSize, int nbHash, int hashType) { super(vectorSize, nbHash, hashType); buckets = new long[buckets2words(vectorSize)]; }
int hashId = Hash.JENKINS_HASH; CountingBloomFilter filter = new CountingBloomFilter(bitSize, hashFunctionNumber, hashId); filter.add(key); assertTrue("CountingBloomFilter.membership error ", filter.membershipTest(key)); assertTrue("CountingBloomFilter.approximateCount error", filter.approximateCount(key) == 1); filter.add(key); assertTrue("CountingBloomFilter.approximateCount error", filter.approximateCount(key) == 2); filter.delete(key); assertTrue("CountingBloomFilter.membership error ", filter.membershipTest(key)); filter.delete(key); assertFalse("CountingBloomFilter.membership error ", filter.membershipTest(key)); assertTrue("CountingBloomFilter.approximateCount error", filter.approximateCount(key) == 0);
public void buildBloomFilters(ResultSet results) { CountingBloomFilter[] bloomFilters = new CountingBloomFilter[attributeList.size()]; for (int i = 0; i < bloomFilters.length; i++) { bloomFilters[i] = new CountingBloomFilter(bloomFilterSize, bloomFilterHashFunction, Hash.MURMUR_HASH); switch (attributeList.get(i).getType()) { case INT: bloomFilters[i].add(new Key(Integer.toString(results.getInt(i + 1)).getBytes())); break; case LONG: bloomFilters[i].add(new Key(Long.toString(results.getLong(i + 1)).getBytes())); break; case FLOAT: bloomFilters[i].add(new Key(Float.toString(results.getFloat(i + 1)).getBytes())); break; case DOUBLE: bloomFilters[i].add(new Key(Double.toString(results.getDouble(i + 1)).getBytes())); break; case STRING: String attributeValue = results.getString(i + 1); if (attributeValue != null) { bloomFilters[i].add(new Key(attributeValue.getBytes())); bloomFilters[i].add(new Key(Boolean.toString(results.getBoolean(i + 1)).getBytes())); break;
public void addToBloomFilters(Object[] obj) { for (int i = 0; i < attributeList.size(); i++) { if (obj[i] != null) { bloomFilters[i].add(new Key(obj[i].toString().getBytes())); } } }
private static Filter getSymmetricFilter(Class<?> filterClass, int numInsertions, int hashType) { int bitSetSize = optimalNumOfBits(numInsertions, 0.03); int hashFunctionNumber = 5; if (filterClass == BloomFilter.class) { return new BloomFilter(bitSetSize, hashFunctionNumber, hashType); } else if (filterClass == CountingBloomFilter.class) { return new CountingBloomFilter(bitSetSize, hashFunctionNumber, hashType); } else if (filterClass == RetouchedBloomFilter.class) { return new RetouchedBloomFilter(bitSetSize, hashFunctionNumber, hashType); } else if (filterClass == DynamicBloomFilter.class) { return new DynamicBloomFilter(bitSetSize, hashFunctionNumber, hashType, 3); } else { //fail fast assertFalse("unexpected filterClass", true); return null; } }
public void removeFromBloomFilters(Object[] obj) { for (int i = 0; i < attributeList.size(); i++) { if (obj[i] != null) { bloomFilters[i].delete(new Key(obj[i].toString().getBytes())); } } }
int hashId = Hash.JENKINS_HASH; CountingBloomFilter filter = new CountingBloomFilter(bitSize, hashFunctionNumber, hashId); filter.add(key); assertTrue("CountingBloomFilter.membership error ", filter.membershipTest(key)); assertTrue("CountingBloomFilter.approximateCount error", filter.approximateCount(key) == 1); filter.add(key); assertTrue("CountingBloomFilter.approximateCount error", filter.approximateCount(key) == 2); filter.delete(key); assertTrue("CountingBloomFilter.membership error ", filter.membershipTest(key)); filter.delete(key); assertFalse("CountingBloomFilter.membership error ", filter.membershipTest(key)); assertTrue("CountingBloomFilter.approximateCount error", filter.approximateCount(key) == 0);
public void addToBloomFilters(ComplexEvent event) { for (int i = 0; i < attributeList.size(); i++) { if (event.getOutputData()[i] != null) { bloomFilters[i].add(new Key(event.getOutputData()[i].toString().getBytes())); } } }
private static Filter getSymmetricFilter(Class<?> filterClass, int numInsertions, int hashType) { int bitSetSize = optimalNumOfBits(numInsertions, 0.03); int hashFunctionNumber = 5; if (filterClass == BloomFilter.class) { return new BloomFilter(bitSetSize, hashFunctionNumber, hashType); } else if (filterClass == CountingBloomFilter.class) { return new CountingBloomFilter(bitSetSize, hashFunctionNumber, hashType); } else if (filterClass == RetouchedBloomFilter.class) { return new RetouchedBloomFilter(bitSetSize, hashFunctionNumber, hashType); } else if (filterClass == DynamicBloomFilter.class) { return new DynamicBloomFilter(bitSetSize, hashFunctionNumber, hashType, 3); } else { //fail fast assertFalse("unexpected filterClass", true); return null; } }
@Override public void and(Filter filter) { if(filter == null || !(filter instanceof CountingBloomFilter) || filter.vectorSize != this.vectorSize || filter.nbHash != this.nbHash) { throw new IllegalArgumentException("filters cannot be and-ed"); } CountingBloomFilter cbf = (CountingBloomFilter)filter; int sizeInWords = buckets2words(vectorSize); for(int i = 0; i < sizeInWords; i++) { this.buckets[i] &= cbf.buckets[i]; } }
/** * Removes a specified key from <i>this</i> counting Bloom filter. * <p> * <b>Invariant</b>: nothing happens if the specified key does not belong to <i>this</i> counter Bloom filter. * @param key The key to remove. */ public void delete(Key key) { if(key == null) { throw new NullPointerException("Key may not be null"); } if(!membershipTest(key)) { throw new IllegalArgumentException("Key is not a member"); } int[] h = hash.hash(key); hash.clear(); for(int i = 0; i < nbHash; i++) { // find the bucket int wordNum = h[i] >> 4; // div 16 int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4 long bucketMask = 15L << bucketShift; long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift; // only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) { // decrement by 1 buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift); } } }
@Override public void or(Filter filter) { if(filter == null || !(filter instanceof CountingBloomFilter) || filter.vectorSize != this.vectorSize || filter.nbHash != this.nbHash) { throw new IllegalArgumentException("filters cannot be or-ed"); } CountingBloomFilter cbf = (CountingBloomFilter)filter; int sizeInWords = buckets2words(vectorSize); for(int i = 0; i < sizeInWords; i++) { this.buckets[i] |= cbf.buckets[i]; } }
/** * Removes a specified key from <i>this</i> counting Bloom filter. * <p> * <b>Invariant</b>: nothing happens if the specified key does not belong to <i>this</i> counter Bloom filter. * @param key The key to remove. */ public void delete(Key key) { if(key == null) { throw new NullPointerException("Key may not be null"); } if(!membershipTest(key)) { throw new IllegalArgumentException("Key is not a member"); } int[] h = hash.hash(key); hash.clear(); for(int i = 0; i < nbHash; i++) { // find the bucket int wordNum = h[i] >> 4; // div 16 int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4 long bucketMask = 15L << bucketShift; long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift; // only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) { // decrement by 1 buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift); } } }
@Override public void readFields(DataInput in) throws IOException { super.readFields(in); int sizeInWords = buckets2words(vectorSize); buckets = new long[sizeInWords]; for(int i = 0; i < sizeInWords; i++) { buckets[i] = in.readLong(); } } }
/** * Removes a specified key from <i>this</i> counting Bloom filter. * <p> * <b>Invariant</b>: nothing happens if the specified key does not belong to <i>this</i> counter Bloom filter. * @param key The key to remove. */ public void delete(Key key) { if(key == null) { throw new NullPointerException("Key may not be null"); } if(!membershipTest(key)) { throw new IllegalArgumentException("Key is not a member"); } int[] h = hash.hash(key); hash.clear(); for(int i = 0; i < nbHash; i++) { // find the bucket int wordNum = h[i] >> 4; // div 16 int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4 long bucketMask = 15L << bucketShift; long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift; // only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) { // decrement by 1 buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift); } } }
@Override public void write(DataOutput out) throws IOException { super.write(out); int sizeInWords = buckets2words(vectorSize); for(int i = 0; i < sizeInWords; i++) { out.writeLong(buckets[i]); } }
/** * Removes a specified key from <i>this</i> counting Bloom filter. * <p> * <b>Invariant</b>: nothing happens if the specified key does not belong to <i>this</i> counter Bloom filter. * @param key The key to remove. */ public void delete(Key key) { if(key == null) { throw new NullPointerException("Key may not be null"); } if(!membershipTest(key)) { throw new IllegalArgumentException("Key is not a member"); } int[] h = hash.hash(key); hash.clear(); for(int i = 0; i < nbHash; i++) { // find the bucket int wordNum = h[i] >> 4; // div 16 int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4 long bucketMask = 15L << bucketShift; long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift; // only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) { // decrement by 1 buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift); } } }
/** * Constructor * @param vectorSize The vector size of <i>this</i> filter. * @param nbHash The number of hash function to consider. * @param hashType type of the hashing function (see * {@link org.apache.hadoop.util.hash.Hash}). */ public CountingBloomFilter(int vectorSize, int nbHash, int hashType) { super(vectorSize, nbHash, hashType); buckets = new long[buckets2words(vectorSize)]; }
/** * Removes a specified key from <i>this</i> counting Bloom filter. * <p> * <b>Invariant</b>: nothing happens if the specified key does not belong to <i>this</i> counter Bloom filter. * @param key The key to remove. */ public void delete(Key key) { if(key == null) { throw new NullPointerException("Key may not be null"); } if(!membershipTest(key)) { throw new IllegalArgumentException("Key is not a member"); } int[] h = hash.hash(key); hash.clear(); for(int i = 0; i < nbHash; i++) { // find the bucket int wordNum = h[i] >> 4; // div 16 int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4 long bucketMask = 15L << bucketShift; long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift; // only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) { // decrement by 1 buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift); } } }