"valid bloom filter type not found in FileInfo"); } else { generalBloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader); if (LOG.isTraceEnabled()) { deleteFamilyBloomFilter = BloomFilterFactory.createFromMeta( bloomMeta, reader); LOG.info("Loaded Delete Family Bloom ("
.create(); generalBloomFilterWriter = BloomFilterFactory.createGeneralBloomAtWrite( conf, cacheConf, bloomType, (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); .createDeleteBloomAtWrite(conf, cacheConf, (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); deleteFamilyBloomContext = new RowBloomContext(deleteFamilyBloomFilterWriter, comparator);
if (!isDeleteFamilyBloomEnabled(conf)) { LOG.info("Delete Bloom filters are disabled by configuration for " + writer.getPath() float err = getErrorRate(conf); int maxFold = getMaxFold(conf); CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(), null, BloomType.ROW);
CacheConfig cacheConf, BloomType bloomType, int maxKeys, HFile.Writer writer) { if (!isGeneralBloomEnabled(conf)) { LOG.trace("Bloom filters are disabled by configuration for " + writer.getPath() float err = getErrorRate(conf); CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(), bloomType == BloomType.ROWCOL ? CellComparatorImpl.COMPARATOR : null, bloomType);
public HFileSortedOplogWriter(int keys) throws IOException { try { int hfileBlockSize = Integer.getInteger( HoplogConfig.HFILE_BLOCK_SIZE_CONF, (1 << 16)); Algorithm compress = Algorithm.valueOf(System.getProperty(HoplogConfig.COMPRESSION, HoplogConfig.COMPRESSION_DEFAULT)); // ByteComparator bc = new ByteComparator(); writer = HFile.getWriterFactory(conf, cacheConf) .withPath(fsProvider.getFS(), path) .withBlockSize(hfileBlockSize) // .withComparator(bc) .withCompression(compress) .create(); // bfw = BloomFilterFactory.createGeneralBloomAtWrite(conf, cacheConf, BloomType.ROW, keys, // writer, bc); bfw = BloomFilterFactory.createGeneralBloomAtWrite(conf, cacheConf, BloomType.ROW, keys, writer); if (logger.isDebugEnabled()) logger.debug("{}Created hoplog writer with compression " + compress, logPrefix); } catch (IOException e) { if (logger.isDebugEnabled()) logger.debug("{}IO Error while creating writer", logPrefix); throw e; } }
/** * Constructor, loads a reader and it's indices, etc. May allocate a substantial amount of ram * depending on the underlying files (10-20MB?). * @param fs fs The current file system to use. * @param fileInfo The store file information. * @param conf The current configuration. * @param cacheConf The cache configuration and block cache reference. * @param cfBloomType The bloom type to use for this store file as specified by column * family configuration. This may or may not be the same as the Bloom filter type * actually present in the HFile, because column family configuration might change. If * this is {@link BloomType#NONE}, the existing Bloom filter is ignored. * @param primaryReplica true if this is a store file for primary replica, otherwise false. */ public HStoreFile(FileSystem fs, StoreFileInfo fileInfo, Configuration conf, CacheConfig cacheConf, BloomType cfBloomType, boolean primaryReplica) { this.streamReaders = ConcurrentHashMap.newKeySet(); this.fs = fs; this.fileInfo = fileInfo; this.cacheConf = cacheConf; this.noReadahead = conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, DEFAULT_STORE_FILE_READER_NO_READAHEAD); if (BloomFilterFactory.isGeneralBloomEnabled(conf)) { this.cfBloomType = cfBloomType; } else { LOG.info("Ignoring bloom filter check for file " + this.getPath() + ": " + "cfBloomType=" + cfBloomType + " (disabled in config)"); this.cfBloomType = BloomType.NONE; } this.primaryReplica = primaryReplica; }
/** * Validates the false positive ratio by computing its z-value and comparing * it to the provided threshold. * * @param falsePosRate experimental positive rate * @param nTrials the number of Bloom filter checks * @param zValueBoundary z-value boundary, positive for an upper bound and * negative for a lower bound * @param cbf the compound Bloom filter we are using * @param additionalMsg additional message to include in log output and * assertion failures */ private void validateFalsePosRate(double falsePosRate, int nTrials, double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) { double p = BloomFilterFactory.getErrorRate(conf); double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials); String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate=" + falsePosRate + ", nTrials=" + nTrials + ")"; LOG.info("z-value is " + zValue + assortedStatsStr); boolean isUpperBound = zValueBoundary > 0; if (isUpperBound && zValue > zValueBoundary || !isUpperBound && zValue < zValueBoundary) { String errorMsg = "False positive rate z-value " + zValue + " is " + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary + assortedStatsStr + ". Per-chunk stats:\n" + cbf.formatTestingStats(); fail(errorMsg + additionalMsg); } }
CacheConfig cacheConf, BloomType bloomType, int maxKeys, HFile.Writer writer) { if (!isGeneralBloomEnabled(conf)) { LOG.trace("Bloom filters are disabled by configuration for " + writer.getPath() float err = getErrorRate(conf); CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(), bloomType == BloomType.ROWCOL ? KeyValue.COMPARATOR : KeyValue.RAW_COMPARATOR);
public HFileSortedOplogWriter(int keys) throws IOException { try { int hfileBlockSize = Integer.getInteger( HoplogConfig.HFILE_BLOCK_SIZE_CONF, (1 << 16)); Algorithm compress = Algorithm.valueOf(System.getProperty(HoplogConfig.COMPRESSION, HoplogConfig.COMPRESSION_DEFAULT)); ByteComparator bc = new ByteComparator(); HFileContext hcontext = new HFileContextBuilder() .withBlockSize(hfileBlockSize) .withCompression(compress) .build(); writer = HFile.getWriterFactory(conf, cacheConf) .withPath(fsProvider.getFS(), path) .withFileContext(hcontext) .withComparator(bc) .create(); bfw = BloomFilterFactory.createGeneralBloomAtWrite(conf, cacheConf, BloomType.ROW, keys, writer); logger.fine("Created hoplog writer with compression " + compress); } catch (IOException e) { logger.fine("IO Error while creating writer"); throw e; } }
if (!BloomFilterFactory.isGeneralBloomEnabled(conf)) { bloomType = BloomType.NONE;
/** * Validates the false positive ratio by computing its z-value and comparing * it to the provided threshold. * * @param falsePosRate experimental positive rate * @param nTrials the number of Bloom filter checks * @param zValueBoundary z-value boundary, positive for an upper bound and * negative for a lower bound * @param cbf the compound Bloom filter we are using * @param additionalMsg additional message to include in log output and * assertion failures */ private void validateFalsePosRate(double falsePosRate, int nTrials, double zValueBoundary, CompoundBloomFilter cbf, String additionalMsg) { double p = BloomFilterFactory.getErrorRate(conf); double zValue = (falsePosRate - p) / Math.sqrt(p * (1 - p) / nTrials); String assortedStatsStr = " (targetErrorRate=" + p + ", falsePosRate=" + falsePosRate + ", nTrials=" + nTrials + ")"; LOG.info("z-value is " + zValue + assortedStatsStr); boolean isUpperBound = zValueBoundary > 0; if (isUpperBound && zValue > zValueBoundary || !isUpperBound && zValue < zValueBoundary) { String errorMsg = "False positive rate z-value " + zValue + " is " + (isUpperBound ? "higher" : "lower") + " than " + zValueBoundary + assortedStatsStr + ". Per-chunk stats:\n" + cbf.formatTestingStats(); fail(errorMsg + additionalMsg); } }
if (!isDeleteFamilyBloomEnabled(conf)) { LOG.info("Delete Bloom filters are disabled by configuration for " + writer.getPath() float err = getErrorRate(conf); int maxFold = getMaxFold(conf); getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(), Bytes.BYTES_RAWCOMPARATOR);
CacheConfig cacheConf, BloomType bloomType, int maxKeys, HFile.Writer writer) { if (!isGeneralBloomEnabled(conf)) { LOG.trace("Bloom filters are disabled by configuration for " + writer.getPath() float err = getErrorRate(conf); getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(), bloomType == BloomType.ROWCOL ? KeyValue.KEY_COMPARATOR : Bytes.BYTES_RAWCOMPARATOR);
BloomFilter bloomFilter = null; if (bloomMeta != null) bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader); bloomFilter = null; if (bloomMeta != null) bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
generalBloomFilterWriter = BloomFilterFactory.createGeneralBloomAtWrite( conf, cacheConf, bloomType, (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); .createDeleteBloomAtWrite(conf, cacheConf, (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); } else {
public HFileSortedOplogWriter() throws IOException { HFileContext hcontext = new HFileContextBuilder() .withBlockSize(sopConfig.getBlockSize()) .withBytesPerCheckSum(sopConfig.getBytesPerChecksum()) .withChecksumType(HFileSortedOplogFactory.convertChecksum( sopConfig.getChecksum())) .withCompression(HFileSortedOplogFactory.convertCompression( sopConfig.getCompression())) .withDataBlockEncoding(HFileSortedOplogFactory.convertEncoding( sopConfig.getKeyEncoding()).getDataBlockEncoding()) .build(); writer = HFile.getWriterFactory(hconf, hcache) .withPath(fs, path) .withFileContext(hcontext) .withComparator(sopConfig.getComparator()) .create(); bfw = sopConfig.isBloomFilterEnabled() ? // BloomFilterFactory.createGeneralBloomAtWrite(hconf, hcache, BloomType.ROW, // 0, writer, sopConfig.getComparator()) BloomFilterFactory.createGeneralBloomAtWrite(hconf, hcache, BloomType.ROW, 0, writer) : null; }
if (!BloomFilterFactory.isGeneralBloomEnabled(conf)) { bloomType = BloomType.NONE;
if (!isDeleteFamilyBloomEnabled(conf)) { LOG.info("Delete Bloom filters are disabled by configuration for " + writer.getPath() float err = getErrorRate(conf); int maxFold = getMaxFold(conf); CompoundBloomFilterWriter bloomWriter = new CompoundBloomFilterWriter(getBloomBlockSize(conf), err, Hash.getHashType(conf), maxFold, cacheConf.shouldCacheBloomsOnWrite(), KeyValue.RAW_COMPARATOR);
public BloomFilterImpl() throws IOException { DataInput bin = reader.getGeneralBloomFilterMetadata(); // instantiate bloom filter if meta present in hfile if (bin != null) { hfileBloom = BloomFilterFactory.createFromMeta(bin, reader); if (reader.getComparator() instanceof DelegatingSerializedComparator) { loadComparators((DelegatingSerializedComparator) hfileBloom.getComparator()); } } else { hfileBloom = null; } }
generalBloomFilterWriter = BloomFilterFactory.createGeneralBloomAtWrite( conf, cacheConf, bloomType, (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); .createDeleteBloomAtWrite(conf, cacheConf, (int) Math.min(maxKeys, Integer.MAX_VALUE), writer); } else {