private void updateReportedDuplicateProbability() { /* * X_{m+1} = \left[ \left(X_m\right)^{\frac{1}{k}} \left\{ X_m + \left( 1 - X_m \right) \left( 1 - \frac{1}{ks} * \right) \right\} + \left( 1 - X_m \right) \frac{1}{s} \right]^k */ final double K = bloomFilters.length; final double S = bloomFilters[0].bitSize(); final double X = reportedDuplicateProbability; final double calculation1 = Math.pow(X, 1D / K); final double calculation2 = X + (1D - X) * (1D - (1D / (K * S))); final double calculation3 = (1D - X) * (1D / S); final double calculation4 = calculation1 * calculation2 + calculation3; reportedDuplicateProbability = Math.pow(calculation4, K); }
private void updateReportedDuplicateProbability() { /* * X_{m+1} = \left[ \left(X_m\right)^{\frac{1}{k}} \left\{ X_m + \left( 1 - X_m \right) \left( 1 - \frac{1}{s} * \right) \right\} + \left( 1 - X_m \right) \frac{1}{s} \right]^k */ final double K = bloomFilters.length; final double S = bloomFilters[0].bitSize(); final double X = reportedDuplicateProbability; final double calculation1 = Math.pow(X, 1D / K); final double calculation2 = X + (1D - X) * (1D - (1D / S)); final double calculation3 = (1D - X) * (1D / S); final double calculation4 = calculation1 * calculation2 + calculation3; reportedDuplicateProbability = Math.pow(calculation4, K); }
private boolean containsHashBuffer(BitArray[] bloomFilters, int[] hashBuffer) { final int hashBufferLength = hashBuffer.length; for (int index = 0; index < hashBufferLength; index++) { final int combinedHash = hashBuffer[index]; final BitArray bloomFilter = bloomFilters[index]; if (!bloomFilter.get(combinedHash % bloomFilter.bitSize())) { return false; } } return true; }
@Override public void reset() { final int bloomFiltersLength = bloomFilters.length; for (int index = 0; index < bloomFiltersLength; index++) { bloomFilters[index] = new BitArray(bloomFilters[index].bitSize()); } reportedDuplicateProbability = 0D; }
private boolean containsHashBuffer(BitArray[] bloomFilters, int[] hashBuffer) { final int hashBufferLength = hashBuffer.length; for (int index = 0; index < hashBufferLength; index++) { final int combinedHash = hashBuffer[index]; final BitArray bloomFilter = bloomFilters[index]; if (!bloomFilter.get(combinedHash % bloomFilter.bitSize())) { return false; } } return true; }
private double calculateAverageLoad(BitArray[] bloomFilters) { double totalLoad = 0D; final int bloomFiltersLength = bloomFilters.length; for (BitArray currentBloomFilter : bloomFilters) { totalLoad += currentBloomFilter.bitCount() / currentBloomFilter.bitSize(); } return totalLoad / bloomFiltersLength; }
private boolean containsHashBuffer(BitArray[] bloomFilters, int[] hashBuffer) { final int hashBufferLength = hashBuffer.length; for (int index = 0; index < hashBufferLength; index++) { final int combinedHash = hashBuffer[index]; final BitArray bloomFilter = bloomFilters[index]; if (!bloomFilter.get(combinedHash % bloomFilter.bitSize())) { return false; } } return true; }
@Override public void reset() { final int bloomFiltersLength = bloomFilters.length; for (int index = 0; index < bloomFiltersLength; index++) { bloomFilters[index] = new BitArray(bloomFilters[index].bitSize()); } reportedDuplicateProbability = 0D; }
@Override public void reset() { final int bloomFiltersLength = bloomFilters.length; for (int index = 0; index < bloomFiltersLength; index++) { bloomFilters[index] = new BitArray(bloomFilters[index].bitSize()); } reportedDuplicateProbability = 0D; }
private void updateReportedDuplicateProbability() { /* * X_{m+1} = \left[ \left(X_m\right)^{\frac{1}{k}} \left\{ X_m + \left( 1 - X_m \right) \left( 1 - \frac{L}{s^2} * \right) \right\} + \left( 1 - X_m \right) \frac{1}{s} \right]^k */ final double K = bloomFilters.length; final double L = calculateAverageLoad(bloomFilters); final double S = bloomFilters[0].bitSize(); final double X = reportedDuplicateProbability; final double calculation1 = Math.pow(X, 1D / K); final double calculation2 = X + (1D - X) * (1D - (L / (S * S))); final double calculation3 = (1D - X) * (1D / S); final double calculation4 = calculation1 * calculation2 + calculation3; reportedDuplicateProbability = Math.pow(calculation4, K); }
private void setHashBuffer(BitArray[] bloomFilters, int[] hashBuffer, SplittableRandom random) { final int hashBufferLength = hashBuffer.length; for (int index = 0; index < hashBufferLength; index++) { final int combinedHash = hashBuffer[index]; final BitArray bloomFilter = bloomFilters[index]; bloomFilter.clear(random.nextLong(bloomFilter.bitSize())); bloomFilter.set(combinedHash % bloomFilter.bitSize()); } }
private void setHashBuffer(BitArray[] bloomFilters, int[] hashBuffer, SplittableRandom random) { final int hashBufferLength = hashBuffer.length; for (int index = 0; index < hashBufferLength; index++) { final int combinedHash = hashBuffer[index]; final BitArray bloomFilter = bloomFilters[index]; final double resetProbability = ((double) bloomFilter.bitCount()) / ((double) bloomFilter.bitSize()); if (random.nextDouble() < resetProbability) { bloomFilter.clear(random.nextLong(bloomFilter.bitSize())); } bloomFilter.set(combinedHash % bloomFilter.bitSize()); } }
private void setHashBuffer(BitArray[] bloomFilters, int[] hashBuffer, SplittableRandom random) { final int hashBufferLength = hashBuffer.length; final BitArray randomBloomFilter = bloomFilters[random.nextInt(hashBufferLength)]; randomBloomFilter.clear(random.nextLong(randomBloomFilter.bitSize())); for (int index = 0; index < hashBufferLength; index++) { final int combinedHash = hashBuffer[index]; final BitArray bloomFilter = bloomFilters[index]; bloomFilter.set(combinedHash % bloomFilter.bitSize()); } }
final long startTime = System.currentTimeMillis(); for (long counter = 1; counter <= STREAM_SIZE; counter++) { final long randomNumber = ThreadLocalRandom.current().nextLong(universeBitArray.bitSize()); fillElementBytes(randomNumber, elementBytes); if (deDuplicator.classifyDistinct(elementBytes)) {