Refine search
@Override public void readFields(DataInput in) throws IOException { super.readFields(in); nr = in.readInt(); currentNbRecord = in.readInt(); int len = in.readInt(); matrix = new BloomFilter[len]; for (int i = 0; i < matrix.length; i++) { matrix[i] = new BloomFilter(); matrix[i].readFields(in); } }
@Override public void add(Key key) { if (key == null) { throw new NullPointerException("Key can not be null"); } BloomFilter bf = getActiveStandardBF(); if (bf == null) { addRow(); bf = matrix[matrix.length - 1]; currentNbRecord = 0; } bf.add(key); currentNbRecord++; }
@Override public boolean membershipTest(Key key) { if (key == null) { return true; } for (int i = 0; i < matrix.length; i++) { if (matrix[i].membershipTest(key)) { return true; } } return false; }
private void addKeyToBloomFilter(Object key) throws ExecException { Key k = new Key(DataType.toBytes(key, keyType)); if (bloomFilters.length == 1) { if (bloomFilters[0] == null) { bloomFilters[0] = new BloomFilter(vectorSizeBytes * 8, numHash, hashType); } bloomFilters[0].add(k); } else { int partition = (key.hashCode() & Integer.MAX_VALUE) % numBloomFilters; BloomFilter filter = bloomFilters[partition]; if (filter == null) { filter = new BloomFilter(vectorSizeBytes * 8, numHash, hashType); bloomFilters[partition] = filter; } filter.add(k); } }
public boolean mightContain(String key) { if (key == null) { throw new NullPointerException("Key cannot by null"); } return filter.membershipTest(new Key(key.getBytes(StandardCharsets.UTF_8))); }
@Override public void readFields(DataInput in) throws IOException { super.readFields(in); createVector(); for (int i = 0; i < fpVector.length; i++) { List<Key> list = fpVector[i]; int size = in.readInt(); for (int j = 0; j < size; j++) { Key k = new Key(); k.readFields(in); list.add(k); } } for (int i = 0; i < keyVector.length; i++) { List<Key> list = keyVector[i]; int size = in.readInt(); for (int j = 0; j < size; j++) { Key k = new Key(); k.readFields(in); list.add(k); } } for (int i = 0; i < ratio.length; i++) { ratio[i] = in.readDouble(); } } }
/** * Adds a new row to <i>this</i> dynamic Bloom filter. */ private void addRow() { BloomFilter[] tmp = new BloomFilter[matrix.length + 1]; for (int i = 0; i < matrix.length; i++) { tmp[i] = matrix[i]; } tmp[tmp.length-1] = new BloomFilter(vectorSize, nbHash, hashType); matrix = tmp; }
protected DataByteArray bloomOr(Tuple input) throws IOException { filter = new BloomFilter(vSize, numHash, hType); try { DataBag values = (DataBag)input.get(0); for (Iterator<Tuple> it = values.iterator(); it.hasNext();) { Tuple t = it.next(); filter.or(bloomIn((DataByteArray)t.get(0))); } } catch (ExecException ee) { throw new IOException(ee); } return bloomOut(); }
@Test public void testNot() { BloomFilter bf = new BloomFilter(8, 1, Hash.JENKINS_HASH); bf.bits = BitSet.valueOf(new byte[] { (byte) 0x95 }); BitSet origBitSet = (BitSet) bf.bits.clone(); bf.not(); assertFalse("BloomFilter#not should have inverted all bits", bf.bits.intersects(origBitSet)); } }
@Override public void write(DataOutput out) throws IOException { super.write(out); out.writeInt(nr); out.writeInt(currentNbRecord); out.writeInt(matrix.length); for (int i = 0; i < matrix.length; i++) { matrix[i].write(out); } }
public DataItem instantiateKaijuItem(byte[] value, Collection<String> allKeys, long timestamp) { BloomFilter bloomFilter = new BloomFilter(Config.getConfig().bloom_filter_num_entries, Config.getConfig().bloom_filter_hf); for(String key : allKeys) { bloomFilter.add(key); } return new DataItem(timestamp, value, bloomFilter); } }
@Override public void or(Filter filter) { if (filter == null || !(filter instanceof DynamicBloomFilter) || filter.vectorSize != this.vectorSize || filter.nbHash != this.nbHash) { throw new IllegalArgumentException("filters cannot be or-ed"); } DynamicBloomFilter dbf = (DynamicBloomFilter)filter; if (dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) { throw new IllegalArgumentException("filters cannot be or-ed"); } for (int i = 0; i < matrix.length; i++) { matrix[i].or(dbf.matrix[i]); } }
@Override public void write(DataOutput out) throws IOException { super.write(out); for (int i = 0; i < fpVector.length; i++) { List<Key> list = fpVector[i]; out.writeInt(list.size()); for (Key k : list) { k.write(out); } } for (int i = 0; i < keyVector.length; i++) { List<Key> list = keyVector[i]; out.writeInt(list.size()); for (Key k : list) { k.write(out); } } for (int i = 0; i < ratio.length; i++) { out.writeDouble(ratio[i]); } }
@Override public void readFields(DataInput in) throws IOException { super.readFields(in); bits = new BitSet(this.vectorSize); byte[] bytes = new byte[getNBytes()]; in.readFully(bytes); for(int i = 0, byteIndex = 0, bitIndex = 0; i < vectorSize; i++, bitIndex++) { if (bitIndex == 8) { bitIndex = 0; byteIndex++; } if ((bytes[byteIndex] & bitvalues[bitIndex]) != 0) { bits.set(i); } } }
@Override public void not() { for (int i = 0; i < matrix.length; i++) { matrix[i].not(); } }
@Override public void readFields(DataInput input) throws IOException { vertexValue.readFields(input); filter.readFields(input); } }
private Result createBloomFilter() throws IOException { // We get a bag of keys. Create a bloom filter from them // First do distinct of the keys. Not using DistinctBag as memory should not be a problem. HashSet<Object> bloomKeys = new HashSet<>(); Iterator<Tuple> iter = bags[0].iterator(); while (iter.hasNext()) { bloomKeys.add(iter.next().get(0)); } Object partition = key; detachInput(); // Free up the key and bags reference BloomFilter bloomFilter = new BloomFilter(vectorSizeBytes * 8, numHash, hashType); for (Object bloomKey: bloomKeys) { Key k = new Key(DataType.toBytes(bloomKey, bloomKeyType)); bloomFilter.add(k); } bloomKeys = null; return getSerializedBloomFilter(partition, bloomFilter, vectorSizeBytes + 64); }
@Override protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { System.out.println("K[" + key + "]"); if(filter.membershipTest(new Key(key.toString().getBytes()))) { context.write(key, value); } } }
@Override public void map(Text key, Text value, OutputCollector<NullWritable, BloomFilter> output, Reporter reporter) throws IOException { System.out.println("K[" + key + "]"); int age = Integer.valueOf(value.toString()); if (age > 30) { filter.add(new Key(key.toString().getBytes())); } collector = output; }