protected void resetBloomFilters() { indexBloomFilters.clear(); int[] stats = calculateBloomStats(); for (int i = 0; i < indexColumns.size(); i++) { indexBloomFilters .add(new CarbonBloomFilter(stats[0], stats[1], Hash.MURMUR_HASH, compressBloom)); } }
public CacheValue(List<CarbonBloomFilter> bloomFilters) { this.bloomFilters = bloomFilters; for (CarbonBloomFilter bloomFilter : bloomFilters) { size += bloomFilter.getSize(); } }
protected void writeBloomDataMapFile() { try { for (int indexColId = 0; indexColId < indexColumns.size(); indexColId++) { CarbonBloomFilter bloomFilter = indexBloomFilters.get(indexColId); bloomFilter.setBlockletNo(currentBlockletId); // only in higher version of guava-bloom-filter, it provides readFrom/writeTo interface. // In lower version, we use default java serializer to write bloomfilter. bloomFilter.write(this.currentDataOutStreams.get(indexColId)); this.currentDataOutStreams.get(indexColId).flush(); } } catch (Exception e) { for (DataOutputStream dataOutputStream : currentDataOutStreams) { CarbonUtil.closeStreams(dataOutputStream); } throw new RuntimeException(e); } finally { resetBloomFilters(); } }
/** * load bloom filter of {@code colName} from {@code shardPath} */ public static List<CarbonBloomFilter> loadBloomIndex( String shardPath, String colName) { DataInputStream dataInStream = null; List<CarbonBloomFilter> bloomFilters = new ArrayList<>(); try { String indexFile = getBloomIndexFile(shardPath, colName); dataInStream = FileFactory.getDataInputStream(indexFile, FileFactory.getFileType(indexFile)); while (dataInStream.available() > 0) { CarbonBloomFilter bloomFilter = new CarbonBloomFilter(); bloomFilter.readFields(dataInStream); bloomFilter.setShardName(new Path(shardPath).getName()); bloomFilters.add(bloomFilter); } LOGGER.info(String.format("Read %d bloom indices from %s", bloomFilters.size(), indexFile)); return bloomFilters; } catch (IOException e) { LOGGER.error("Error occurs while reading bloom index", e); throw new RuntimeException("Error occurs while reading bloom index", e); } finally { CarbonUtil.closeStreams(dataInStream); } }
List<CarbonBloomFilter> bloomIndexList = cacheValue.getBloomFilters(); for (CarbonBloomFilter bloomFilter : bloomIndexList) { if (needShardPrune && !filteredShard.contains(bloomFilter.getShardName())) { scanRequired = bloomFilter.membershipTest(new Key(value)); if (scanRequired) { if (LOGGER.isDebugEnabled()) { LOGGER.debug(String.format("BloomCoarseGrainDataMap: Need to scan -> blocklet#%s", String.valueOf(bloomFilter.getBlockletNo()))); Blocklet blocklet = new Blocklet(bloomFilter.getShardName(), String.valueOf(bloomFilter.getBlockletNo())); tempHitBlockletsResult.add(blocklet); } else if (LOGGER.isDebugEnabled()) { LOGGER.debug(String.format("BloomCoarseGrainDataMap: Skip scan -> blocklet#%s", String.valueOf(bloomFilter.getBlockletNo())));
public int getSize() { int size = 14; // size of nbHash,hashType, vectorSize, compress if (compress) { size += bitmap.getSizeInBytes(); } else { try { size += getBitSet().toLongArray().length * 8; } catch (IOException e) { throw new RuntimeException(e); } } return size; }
@Override public void readFields(DataInput in) throws IOException { this.blockletNo = in.readInt(); this.nbHash = in.readInt(); this.hashType = in.readByte(); this.vectorSize = in.readInt(); this.compress = in.readBoolean(); if (!compress) { int len = in.readInt(); byte[] bytes = new byte[len]; in.readFully(bytes); setBitSet(BitSet.valueOf(bytes)); } else { this.bitmap = new RoaringBitmap(); bitmap.deserialize(in); } this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType); }
protected void addValue2BloomIndex(int indexColIdx, Object value) { byte[] indexValue; // convert measure to bytes // convert non-dict dimensions to simple bytes without length // convert internal-dict dimensions to simple bytes without any encode if (indexColumns.get(indexColIdx).isMeasure()) { // NULL value of all measures are already processed in `ColumnPage.getData` // or `RawBytesReadSupport.readRow` with actual data type // Carbon stores boolean as byte. Here we convert it for `getValueAsBytes` if (indexColumns.get(indexColIdx).getDataType().equals(DataTypes.BOOLEAN)) { value = BooleanConvert.boolean2Byte((Boolean)value); } indexValue = CarbonUtil.getValueAsBytes(indexColumns.get(indexColIdx).getDataType(), value); } else { if (indexColumns.get(indexColIdx).hasEncoding(Encoding.DICTIONARY) || indexColumns.get(indexColIdx).hasEncoding(Encoding.DIRECT_DICTIONARY)) { indexValue = convertDictionaryValue(indexColIdx, value); } else { indexValue = convertNonDictionaryValue(indexColIdx, value); } } if (indexValue.length == 0) { indexValue = CarbonCommonConstants.MEMBER_DEFAULT_VAL_ARRAY; } indexBloomFilters.get(indexColIdx).add(new Key(indexValue)); }
DataInputStream indexDataInStream = new DataInputStream(byteArrayInputStream); while (indexDataInStream.available() > 0) { CarbonBloomFilter bloomFilter = new CarbonBloomFilter(); bloomFilter.readFields(indexDataInStream); bloomFilter.setShardName(shardName); bloomFilters.add(bloomFilter);
@Override public void write(DataOutput out) throws IOException { out.writeInt(blockletNo); out.writeInt(this.nbHash); out.writeByte(this.hashType); out.writeInt(this.vectorSize); out.writeBoolean(compress); BitSet bits = getBitSet(); if (!compress) { byte[] bytes = bits.toByteArray(); out.writeInt(bytes.length); out.write(bytes); } else { RoaringBitmap bitmap = new RoaringBitmap(); int length = bits.cardinality(); int nextSetBit = bits.nextSetBit(0); for (int i = 0; i < length; ++i) { bitmap.add(nextSetBit); nextSetBit = bits.nextSetBit(nextSetBit + 1); } bitmap.serialize(out); } }