/** * Serialize a bloom filter * @param out output stream to write to * @param bloomFilter BloomFilter that needs to be seralized */ public static void serialize(OutputStream out, BloomFilter bloomFilter) throws IOException { /** * Serialized BloomFilter format: * 1 byte for the number of hash functions. * 1 big endian int(That is how OutputStream works) for the number of longs in the bitset * big endian longs in the BloomFilter bitset */ DataOutputStream dataOutputStream = new DataOutputStream(out); dataOutputStream.writeByte(bloomFilter.numHashFunctions); dataOutputStream.writeInt(bloomFilter.getBitSet().length); for (long value : bloomFilter.getBitSet()) { dataOutputStream.writeLong(value); } }
public HiveBloomFilter(BloomFilter bloomFilter) { this.bitSet = new BitSet(bloomFilter.getBitSet().clone()); this.numBits = bloomFilter.getBitSize(); this.numHashFunctions = bloomFilter.getNumHashFunctions(); }
bloomFilterBuilder.addAllBitset(Longs.asList(bloomFilterWrite.getBitSet())); bloomFilterBuilder.setNumHashFunctions(bloomFilterWrite.getNumHashFunctions()); assertTrue(Arrays.equals(bloomFilters.get(0).getBitSet(), bloomFilterWrite.getBitSet())); assertTrue(Arrays.equals(Longs.toArray(bloomFilterRead.getBitsetList()), bloomFilterWrite.getBitSet())); assertEquals(bloomFilterWrite.getBitSet().length, bloomFilterRead.getBitsetCount());
private static OrcProto.BloomFilter toOrcBloomFilter(BloomFilter bloomFilter) { OrcProto.BloomFilter.Builder builder = OrcProto.BloomFilter.newBuilder(); builder.addAllBitset(Longs.asList(bloomFilter.getBitSet())); builder.setNumHashFunctions(bloomFilter.getNumHashFunctions()); return builder.build(); }
@Override public int getAggregationBufferFixedSize() { if (bitSetSize < 0) { // Not pretty, but we need a way to get the size try { Aggregation agg = (Aggregation) getNewAggregationBuffer(); bitSetSize = agg.bf.getBitSet().length; } catch (Exception e) { throw new RuntimeException("Unexpected error while creating AggregationBuffer", e); } } // BloomFilter: object(BitSet: object(data: long[]), numBits: int, numHashFunctions: int) JavaDataModel model = JavaDataModel.get(); int bloomFilterSize = JavaDataModel.alignUp(model.object() + model.lengthForLongArrayOfSize(bitSetSize), model.memoryAlign()); return JavaDataModel.alignUp( model.object() + bloomFilterSize + model.primitive1() + model.primitive1(), model.memoryAlign()); }
/** * Serialize a bloom filter * @param out output stream to write to * @param bloomFilter BloomFilter that needs to be seralized */ public static void serialize(OutputStream out, BloomFilter bloomFilter) throws IOException { /** * Serialized BloomFilter format: * 1 byte for the number of hash functions. * 1 big endian int(That is how OutputStream works) for the number of longs in the bitset * big endian longs in the BloomFilter bitset */ DataOutputStream dataOutputStream = new DataOutputStream(out); dataOutputStream.writeByte(bloomFilter.numHashFunctions); dataOutputStream.writeInt(bloomFilter.getBitSet().length); for (long value : bloomFilter.getBitSet()) { dataOutputStream.writeLong(value); } }
public HiveBloomFilter(BloomFilter bloomFilter) { this.bitSet = new BitSet(bloomFilter.getBitSet().clone()); this.numBits = bloomFilter.getBitSize(); this.numHashFunctions = bloomFilter.getNumHashFunctions(); }
public HiveBloomFilter(BloomFilter bloomFilter) { this.bitSet = new BitSet(bloomFilter.getBitSet().clone()); this.numBits = bloomFilter.getBitSize(); this.numHashFunctions = bloomFilter.getNumHashFunctions(); }
public HiveBloomFilter(BloomFilter bloomFilter) { this.bitSet = new BitSet(bloomFilter.getBitSet().clone()); this.numBits = bloomFilter.getBitSize(); this.numHashFunctions = bloomFilter.getNumHashFunctions(); }
bloomFilterBuilder.addAllBitset(Longs.asList(bloomFilterWrite.getBitSet())); bloomFilterBuilder.setNumHashFunctions(bloomFilterWrite.getNumHashFunctions()); assertTrue(Arrays.equals(bloomFilters.get(0).getBitSet(), bloomFilterWrite.getBitSet())); assertTrue(Arrays.equals(Longs.toArray(bloomFilterRead.getBitsetList()), bloomFilterWrite.getBitSet())); assertEquals(bloomFilterWrite.getBitSet().length, bloomFilterRead.getBitsetCount());
bloomFilterBuilder.addAllBitset(Longs.asList(bloomFilterWrite.getBitSet())); bloomFilterBuilder.setNumHashFunctions(bloomFilterWrite.getNumHashFunctions()); assertTrue(Arrays.equals(bloomFilters.get(0).getBitSet(), bloomFilterWrite.getBitSet())); assertTrue(Arrays.equals(Longs.toArray(bloomFilterRead.getBitsetList()), bloomFilterWrite.getBitSet())); assertEquals(bloomFilterWrite.getBitSet().length, bloomFilterRead.getBitsetCount());
bloomFilterBuilder.addAllBitset(Longs.asList(bloomFilterWrite.getBitSet())); bloomFilterBuilder.setNumHashFunctions(bloomFilterWrite.getNumHashFunctions()); assertTrue(Arrays.equals(bloomFilters.get(0).getBitSet(), bloomFilterWrite.getBitSet())); assertTrue(Arrays.equals(Longs.toArray(bloomFilterRead.getBitsetList()), bloomFilterWrite.getBitSet())); assertEquals(bloomFilterWrite.getBitSet().length, bloomFilterRead.getBitsetCount());
private static OrcProto.BloomFilter toOrcBloomFilter(BloomFilter bloomFilter) { OrcProto.BloomFilter.Builder builder = OrcProto.BloomFilter.newBuilder(); builder.addAllBitset(Longs.asList(bloomFilter.getBitSet())); builder.setNumHashFunctions(bloomFilter.getNumHashFunctions()); return builder.build(); }
private static OrcProto.BloomFilter toOrcBloomFilter(BloomFilter bloomFilter) { OrcProto.BloomFilter.Builder builder = OrcProto.BloomFilter.newBuilder(); builder.addAllBitset(Longs.asList(bloomFilter.getBitSet())); builder.setNumHashFunctions(bloomFilter.getNumHashFunctions()); return builder.build(); }
private static OrcProto.BloomFilter toOrcBloomFilter(BloomFilter bloomFilter) { OrcProto.BloomFilter.Builder builder = OrcProto.BloomFilter.newBuilder(); builder.addAllBitset(Longs.asList(bloomFilter.getBitSet())); builder.setNumHashFunctions(bloomFilter.getNumHashFunctions()); return builder.build(); }