@Test public void rememberTimeout() throws Exception{ ExtractedTextCache cache = new ExtractedTextCache(0, 0, false, null); Blob b = new IdBlob("hello", "a"); cache.put(b, ExtractedText.ERROR); assertNull(cache.get("/a", "foo", b, false)); cache.putTimeout(b, ExtractedText.ERROR); assertEquals(FulltextIndexEditor.TEXT_EXTRACTION_ERROR, cache.get("/a", "foo", b, false)); }
private void registerExtractedTextProvider(PreExtractedTextProvider provider){ if (extractedTextCache != null){ if (provider != null){ String usage = extractedTextCache.isAlwaysUsePreExtractedCache() ? "always" : "only during reindexing phase"; log.info("Registering PreExtractedTextProvider {} with extracted text cache. " + "It would be used {}", provider, usage); } else { log.info("Unregistering PreExtractedTextProvider with extracted text cache"); } extractedTextCache.setExtractedTextProvider(provider); } }
public void close() { resetCache(); // don't clean the persistent map on purpose, so we don't re-try // after restarting the service or so closeExecutorService(); }
@Test public void preExtractionNoReindex() throws Exception{ ExtractedTextCache cache = new ExtractedTextCache(10 * FileUtils.ONE_MB, 100); PreExtractedTextProvider provider = mock(PreExtractedTextProvider.class); cache.setExtractedTextProvider(provider); Blob b = new IdBlob("hello", "a"); String text = cache.get("/a", "foo", b, false); assertNull(text); verifyZeroInteractions(provider); }
@Test public void cacheEnabled() throws Exception { ExtractedTextCache cache = new ExtractedTextCache(10 * FileUtils.ONE_MB, 100); assertNotNull(cache.getCacheStats()); Blob b = new IdBlob("hello", "a"); String text = cache.get("/a", "foo", b, false); assertNull(text); cache.put(b, new ExtractedText(ExtractionResult.SUCCESS, "test hello")); text = cache.get("/a", "foo", b, false); assertEquals("test hello", text); }
public LuceneIndexEditorProvider(@Nullable IndexCopier indexCopier) { //Disable the cache by default in ExtractedTextCache this(indexCopier, new ExtractedTextCache(0, 0)); }
private void initializeExtractedTextCache(BundleContext bundleContext, Map<String, ?> config) { int cacheSizeInMB = PropertiesUtil.toInteger(config.get(PROP_EXTRACTED_TEXT_CACHE_SIZE), PROP_EXTRACTED_TEXT_CACHE_SIZE_DEFAULT); int cacheExpiryInSecs = PropertiesUtil.toInteger(config.get(PROP_EXTRACTED_TEXT_CACHE_EXPIRY), PROP_EXTRACTED_TEXT_CACHE_EXPIRY_DEFAULT); boolean alwaysUsePreExtractedCache = PropertiesUtil.toBoolean(config.get(PROP_PRE_EXTRACTED_TEXT_ALWAYS_USE), PROP_PRE_EXTRACTED_TEXT_ALWAYS_USE_DEFAULT); extractedTextCache = new ExtractedTextCache( cacheSizeInMB * ONE_MB, cacheExpiryInSecs, alwaysUsePreExtractedCache, indexDir); if (extractedTextProvider != null){ registerExtractedTextProvider(extractedTextProvider); } CacheStats stats = extractedTextCache.getCacheStats(); if (stats != null){ oakRegs.add(registerMBean(whiteboard, CacheStatsMBean.class, stats, CacheStatsMBean.TYPE, stats.getName())); log.info("Extracted text caching enabled with maxSize {} MB, expiry time {} secs", cacheSizeInMB, cacheExpiryInSecs); } }
@Test public void cacheEnabledNonIdBlob() throws Exception { ExtractedTextCache cache = new ExtractedTextCache(10 * FileUtils.ONE_MB, 100); Blob b = new ArrayBasedBlob("hello".getBytes()); String text = cache.get("/a", "foo", b, false); assertNull(text); cache.put(b, new ExtractedText(ExtractionResult.SUCCESS, "test hello")); text = cache.get("/a", "foo", b, false); assertNull(text); }
@Test public void process() throws Throwable { ExtractedTextCache cache = new ExtractedTextCache(0, 0, false, null); try { cache.process("test", new Callable<Void>() { @Override public Void call() throws Exception { assertEquals(0, cache.getStatsMBean().getTimeoutCount()); cache.setExtractionTimeoutMillis(10); long time = System.currentTimeMillis(); try { cache.process("test", new Callable<Void>() { @Override public Void call() throws Exception { assertEquals(1, cache.getStatsMBean().getTimeoutCount());
if (length > SMALL_BINARY) { String name = "Extracting " + path + ", " + length + " bytes"; extractedTextCache.process(name, new Callable<Void>() { @Override public Void call() throws Exception { + " The stack trace is included to flag some 'unintended' failures", getIndexName(), path, e); extractedTextCache.put(v, ExtractedText.ERROR); return TEXT_EXTRACTION_ERROR; } catch (TimeoutException t) { "[{}] Failed to extract text from a binary property due to timeout: {}.", getIndexName(), path); extractedTextCache.put(v, ExtractedText.ERROR); extractedTextCache.putTimeout(v, ExtractedText.ERROR); return TEXT_EXTRACTION_ERROR; } catch (Throwable t) { + " help improve the text extraction feature.", getIndexName(), path, t); extractedTextCache.put(v, ExtractedText.ERROR); return TEXT_EXTRACTION_ERROR; } else { extractedTextCache.put(v, new ExtractedText(ExtractedText.ExtractionResult.SUCCESS, result)); return result;
@Test public void preExtractionNoReindexNoProvider() throws Exception{ ExtractedTextCache cache = new ExtractedTextCache(10 * FileUtils.ONE_MB, 100); Blob b = new IdBlob("hello", "a"); String text = cache.get("/a", "foo", b, true); assertNull(text); }
@Test public void preExtractedTextProvider() throws Exception{ MockOsgi.activate(service, context.bundleContext(), getDefaultConfig()); LuceneIndexEditorProvider editorProvider = (LuceneIndexEditorProvider) context.getService(IndexEditorProvider.class); assertNull(editorProvider.getExtractedTextCache().getExtractedTextProvider()); assertFalse(editorProvider.getExtractedTextCache().isAlwaysUsePreExtractedCache()); //Mock OSGi does not support components //context.registerService(PreExtractedTextProvider.class, new DummyProvider()); service.bindExtractedTextProvider(mock(PreExtractedTextProvider.class)); assertNotNull(editorProvider.getExtractedTextCache().getExtractedTextProvider()); }
@Test public void enableExtractedTextCaching() throws Exception{ Map<String,Object> config = getDefaultConfig(); config.put("extractedTextCacheSizeInMB", 11); MockOsgi.activate(service, context.bundleContext(), config); ExtractedTextCache textCache = service.getExtractedTextCache(); assertNotNull(textCache.getCacheStats()); assertNotNull(context.getService(CacheStatsMBean.class)); assertEquals(11 * FileUtils.ONE_MB, textCache.getCacheStats().getMaxTotalWeight()); MockOsgi.deactivate(service, context.bundleContext()); assertNull(context.getService(CacheStatsMBean.class)); }
@Test public void preExtractedTextProvider() throws Exception{ Tree idx = createFulltextIndex(root.getTree("/"), "test"); TestUtil.useV2(idx); root.commit(); AccessStateProvidingBlob testBlob = new AccessStateProvidingBlob("fox is jumping", "id1"); MapBasedProvider textProvider = new MapBasedProvider(); textProvider.write("id1","lion"); editorProvider.getExtractedTextCache().setExtractedTextProvider(textProvider); Tree test = root.getTree("/").addChild("test"); createFileNode(test, "text", testBlob, "text/plain"); root.commit(); //As its not a reindex case actual blob content would be accessed assertTrue(testBlob.isStreamAccessed()); assertQuery("select * from [nt:base] where CONTAINS(*, 'fox ')", asList("/test/text/jcr:content")); assertEquals(0, textProvider.accessCount); testBlob.resetState(); //Lets trigger a reindex root.getTree(idx.getPath()).setProperty(IndexConstants.REINDEX_PROPERTY_NAME, true); root.commit(); //Now the content should be provided by the PreExtractedTextProvider //and instead of fox its lion! assertFalse(testBlob.isStreamAccessed()); assertQuery("select * from [nt:base] where CONTAINS(*, 'lion ')", asList("/test/text/jcr:content")); assertEquals(1, textProvider.accessCount); }
private void registerIndexEditor(BundleContext bundleContext, IndexTracker tracker, LuceneIndexMBean mBean, Map<String, ?> config) throws IOException { boolean enableCopyOnWrite = PropertiesUtil.toBoolean(config.get(PROP_COPY_ON_WRITE), PROP_COPY_ON_WRITE_DEFAULT); if (enableCopyOnWrite){ initializeIndexCopier(bundleContext, config); editorProvider = new LuceneIndexEditorProvider(indexCopier, tracker, extractedTextCache, augmentorFactory, mountInfoProvider, activeDeletedBlobCollector, mBean, statisticsProvider); log.info("Enabling CopyOnWrite support. Index files would be copied under {}", indexDir.getAbsolutePath()); } else { editorProvider = new LuceneIndexEditorProvider(null, tracker, extractedTextCache, augmentorFactory, mountInfoProvider, activeDeletedBlobCollector, mBean, statisticsProvider); } editorProvider.setBlobStore(blobStore); if (hybridIndex){ editorProvider.setIndexingQueue(checkNotNull(documentQueue)); } Dictionary<String, Object> props = new Hashtable<String, Object>(); props.put("type", TYPE_LUCENE); regs.add(bundleContext.registerService(IndexEditorProvider.class.getName(), editorProvider, props)); oakRegs.add(registerMBean(whiteboard, TextExtractionStatsMBean.class, editorProvider.getExtractedTextCache().getStatsMBean(), TextExtractionStatsMBean.TYPE, "TextExtraction statistics")); }
private String parseStringValue(Blob v, Metadata metadata, String path, String propertyName) { String text = extractedTextCache.get(path, propertyName, v, reindex); if (text == null){ text = parseStringValue0(v, metadata, path); } return text; }
@Test public void alwaysUsePreExtractedCache() throws Exception{ Map<String,Object> config = getDefaultConfig(); config.put("alwaysUsePreExtractedCache", "true"); MockOsgi.activate(service, context.bundleContext(), config); LuceneIndexEditorProvider editorProvider = (LuceneIndexEditorProvider) context.getService(IndexEditorProvider.class); assertTrue(editorProvider.getExtractedTextCache().isAlwaysUsePreExtractedCache()); }
root.commit(); editorProvider.getExtractedTextCache().resetCache(); editorProvider.getExtractedTextCache().resetCache();
@Test public void preExtractionAlwaysUse() throws Exception{ ExtractedTextCache cache = new ExtractedTextCache(10 * FileUtils.ONE_MB, 100, true, null); PreExtractedTextProvider provider = mock(PreExtractedTextProvider.class); cache.setExtractedTextProvider(provider); when(provider.getText(anyString(), any(Blob.class))) .thenReturn(new ExtractedText(ExtractionResult.SUCCESS, "bar")); Blob b = new IdBlob("hello", "a"); String text = cache.get("/a", "foo", b, false); assertEquals("bar", text); }
public LuceneIndexEditorProvider(@Nullable IndexCopier indexCopier) { //Disable the cache by default in ExtractedTextCache this(indexCopier, new ExtractedTextCache(0, 0)); }