public static <T> BloomFilter<T> update(BloomFilter<T> filter, T term) { filter.apply(term); return filter; }
synchronized boolean contains(String blobId) throws IOException { if (!bloomFilter.apply(blobId)) { return false; } Boolean cached = cache.getIfPresent(blobId); if (cached != null) { return cached; } if (isPresentInStore(blobId)) { cache.put(blobId, Boolean.TRUE); bloomFilter.put(blobId); return true; } else { cache.put(blobId, Boolean.FALSE); return false; } }
synchronized boolean contains(String blobId) throws IOException { if (!bloomFilter.apply(blobId)) { return false; } Boolean cached = cache.getIfPresent(blobId); if (cached != null) { return cached; } if (isPresentInStore(blobId)) { cache.put(blobId, Boolean.TRUE); bloomFilter.put(blobId); return true; } else { cache.put(blobId, Boolean.FALSE); return false; } }
synchronized boolean contains(String blobId) throws IOException { if (!bloomFilter.apply(blobId)) { return false; } Boolean cached = cache.getIfPresent(blobId); if (cached != null) { return cached; } if (isPresentInStore(blobId)) { cache.put(blobId, Boolean.TRUE); bloomFilter.put(blobId); return true; } else { cache.put(blobId, Boolean.FALSE); return false; } }
@Test public void testTokenize_Timeout() throws Exception { // Create test input final NormalizedContentInterface nci = new NormalizedFieldAndValue("TEST", "test"); // Set expectations expect(this.filter.apply(isA(String.class))).andReturn(true); // Only first token is applied before a thread sleep // in the substrategy triggers a TimeoutException to be // thrown by the parent TimeoutTokenizationStrategy // Run the test PowerMock.replayAll(); SimulatedProcessingDelayStrategy delayStrategy = new SimulatedProcessingDelayStrategy(this.filter, 0); TimeoutStrategy subject = new TimeoutStrategy(delayStrategy, System.currentTimeMillis(), 200); NGramTokenizationStrategy strategy = new NGramTokenizationStrategy(subject); TimeoutException result1 = null; try { strategy.tokenize(nci, TimeoutStrategy.DEFAULT_MAX_NGRAM_LENGTH); } catch (TimeoutException e) { result1 = e; } PowerMock.verifyAll(); // Verify results assertNotNull("Strategy should have thrown a timeout exception", result1); }
expect(this.filter.apply(isA(String.class))).andReturn(true).times(expectedNGramCount);
expect(this.filter.apply(isA(String.class))).andReturn(true).times(expectedNGramCount - 10, idealFilterSize); // Allow for rounding errors
@Test public void testTokenize_FilterSizeBasedPruning() throws Exception { // Create test input final NormalizedContentInterface nci = new NormalizedFieldAndValue("TEST", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); // Calculate the expected number of n-grams final String fieldValue = nci.getIndexedFieldValue(); int expectedNGramCount = BloomFilterUtil.predictNGramCount(fieldValue, AbstractNGramTokenizationStrategy.DEFAULT_MAX_NGRAM_LENGTH); // Set expectations expect(this.filter.apply(isA(String.class))).andReturn(true).times(expectedNGramCount); // Run the test PowerMock.replayAll(); NGramTokenizationStrategy subject = new NGramTokenizationStrategy(this.filter); int result1 = subject.tokenize(nci, AbstractNGramTokenizationStrategy.DEFAULT_MAX_NGRAM_LENGTH); PowerMock.verifyAll(); // Verify results assertEquals("Should have tokenized and applied " + expectedNGramCount + " n-grams to the bloom filter", expectedNGramCount, result1); }
expect(this.filter.apply(isA(String.class))).andReturn(true).times(expectedNGramCount - 4, idealFilterSize); // Allow for rounding errors
@Test public void testTokenize_NoPruning() throws Exception { // Create test input final NormalizedContentInterface nci = new NormalizedFieldAndValue("TEST", "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"); // Calculate the expected number of n-grams final String fieldValue = nci.getIndexedFieldValue(); int expectedNGramCount = BloomFilterUtil.predictNGramCount(fieldValue, NGramTokenizationStrategy.DEFAULT_MAX_NGRAM_LENGTH); // Set expectations expect(this.filter.apply(isA(String.class))).andReturn(true).times(expectedNGramCount); // Run the test PowerMock.replayAll(); NGramTokenizationStrategy subject = new NGramTokenizationStrategy(this.filter); int result1 = subject.tokenize(nci, NGramTokenizationStrategy.DEFAULT_MAX_NGRAM_LENGTH); PowerMock.verifyAll(); // Verify results assertEquals("Should have tokenized and applied " + expectedNGramCount + " n-grams to the bloom filter", expectedNGramCount, result1); }
expect(this.filter.apply(isA(String.class))).andReturn(true).times(expectedNGramCount - 4, idealFilterSize); // Allow for rounding errors
expect(ResourceAvailabilityUtil.isDiskAvailable("/", .05f)).andReturn(true).times(1); expect(ResourceAvailabilityUtil.isDiskAvailable("/", .05f)).andReturn(false).times(1); expect(this.filter.apply(isA(String.class))).andReturn(true).anyTimes(); // Allow for timeout this.logger.warn(isA(String.class), isA(LowDiskSpaceException.class));