/** * Adds a new row to <i>this</i> dynamic Bloom filter. */ private void addRow() { BloomFilter[] tmp = new BloomFilter[matrix.length + 1]; for (int i = 0; i < matrix.length; i++) { tmp[i] = matrix[i]; } tmp[tmp.length-1] = new BloomFilter(vectorSize, nbHash, hashType); matrix = tmp; }
@Override public void not() { for (int i = 0; i < matrix.length; i++) { matrix[i].not(); } }
/** * Constructor. * @param vectorSize The vector size of <i>this</i> filter. * @param nbHash The number of hash functions to consider. * @param hashType type of the hashing function (see {@link Hash}). */ protected Filter(int vectorSize, int nbHash, int hashType) { this.vectorSize = vectorSize; this.nbHash = nbHash; this.hashType = hashType; this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType); }
@Override public void readFields(DataInput in) throws IOException { super.readFields(in); nr = in.readInt(); currentNbRecord = in.readInt(); int len = in.readInt(); matrix = new BloomFilter[len]; for (int i = 0; i < matrix.length; i++) { matrix[i] = new BloomFilter(); matrix[i].readFields(in); } }
@Override public void add(Key key) { if (key == null) { throw new NullPointerException("Key can not be null"); } BloomFilter bf = getActiveStandardBF(); if (bf == null) { addRow(); bf = matrix[matrix.length - 1]; currentNbRecord = 0; } bf.add(key); currentNbRecord++; }
@Override public void readFields(DataInput in) throws IOException { super.readFields(in); int sizeInWords = buckets2words(vectorSize); buckets = new long[sizeInWords]; for(int i = 0; i < sizeInWords; i++) { buckets[i] = in.readLong(); } } }
@Override public void write(DataOutput out) throws IOException { super.write(out); int sizeInWords = buckets2words(vectorSize); for(int i = 0; i < sizeInWords; i++) { out.writeLong(buckets[i]); } }
@Override public boolean membershipTest(Key key) { if (key == null) { return true; } for (int i = 0; i < matrix.length; i++) { if (matrix[i].membershipTest(key)) { return true; } } return false; }
/** * Adds a list of keys to <i>this</i> filter. * @param keys The list of keys. */ public void add(List<Key> keys){ if(keys == null) { throw new IllegalArgumentException("ArrayList<Key> may not be null"); } for(Key key: keys) { add(key); } }//end add()
/** * Constructor. * <p> * Builds a key with a specified weight. * @param value The value of <i>this</i> key. * @param weight The weight associated to <i>this</i> key. */ public Key(byte[] value, double weight) { set(value, weight); }
private double getWeight(List<Key> keyList) { double weight = 0.0; for (Key k : keyList) { weight += k.getWeight(); } return weight; }
@Override public int compareTo(Key other) { int result = this.bytes.length - other.getBytes().length; for (int i = 0; result == 0 && i < bytes.length; i++) { result = this.bytes[i] - other.bytes[i]; } if (result == 0) { result = (int)(this.weight - other.weight); } return result; } }
/** * Constructor * @param vectorSize The vector size of <i>this</i> filter. * @param nbHash The number of hash function to consider. * @param hashType type of the hashing function (see * {@link org.apache.hadoop.util.hash.Hash}). */ public RetouchedBloomFilter(int vectorSize, int nbHash, int hashType) { super(vectorSize, nbHash, hashType); this.rand = null; createVector(); }
/** * Constructor * @param vectorSize The vector size of <i>this</i> filter. * @param nbHash The number of hash function to consider. * @param hashType type of the hashing function (see * {@link org.apache.hadoop.util.hash.Hash}). */ public CountingBloomFilter(int vectorSize, int nbHash, int hashType) { super(vectorSize, nbHash, hashType); buckets = new long[buckets2words(vectorSize)]; }
@Override public boolean equals(Object o) { if (!(o instanceof Key)) { return false; } return this.compareTo((Key)o) == 0; }
/** * Adds an array of keys to <i>this</i> filter. * @param keys The array of keys. */ public void add(Key[] keys){ if(keys == null) { throw new IllegalArgumentException("Key[] may not be null"); } for(int i = 0; i < keys.length; i++) { add(keys[i]); } }//end add()
/** * Chooses the bit position that maximizes the number of false positive removed. * @param h The different bit positions. * @return The position that maximizes the number of false positive removed. */ private int maximumFpRemove(int[] h) { int maxIndex = Integer.MIN_VALUE; double maxValue = Double.MIN_VALUE; for (int i = 0; i < nbHash; i++) { double fpWeight = getWeight(fpVector[h[i]]); if (fpWeight > maxValue) { maxValue = fpWeight; maxIndex = h[i]; } } return maxIndex; }
/** * Chooses the bit position that minimizes the number of false negative generated. * @param h The different bit positions. * @return The position that minimizes the number of false negative generated. */ private int minimumFnRemove(int[] h) { int minIndex = Integer.MAX_VALUE; double minValue = Double.MAX_VALUE; for (int i = 0; i < nbHash; i++) { double keyWeight = getWeight(keyVector[h[i]]); if (keyWeight < minValue) { minIndex = h[i]; minValue = keyWeight; } } return minIndex; }