public void testDetector(Detector detector, int numThreads, int numIterations, FileFilter filter, int randomlyResizeSAXPool) throws Exception { Path[] files = getTestFiles(filter); testDetectorEach(detector, files, numThreads, numIterations, randomlyResizeSAXPool); testDetectorOnAll(detector, files, numThreads, numIterations, randomlyResizeSAXPool); }
/** * This calls {@link #testEach(Parser parser, Path[], ParseContext[], int, int)} and * then {@link #testAll(Parser parser, Path[], ParseContext[], int, int)} * * @param numThreads number of threads to use * @param numIterations number of iterations per thread * @param filter file filter to select files from "/test-documents"; if <code>null</code>, * all files will be used * @throws Exception */ protected void testMultiThreaded(Parser parser, ParseContext[] parseContext, int numThreads, int numIterations, FileFilter filter) throws Exception { Path[] allFiles = getTestFiles(filter); testEach(parser, allFiles, parseContext, numThreads, numIterations); testAll(parser, allFiles, parseContext, numThreads, numIterations); }
private void testDetectorOnAll(Detector detector, Path[] toTest, int numThreads, int numIterations, int randomlyResizeSAXPool) { Map<Path, MediaType> truth = getBaselineDetection(detector, toTest); //if all files caused an exception if (truth.size() == 0) { return; } //only those that parsed without exception Path[] testFiles = new Path[truth.size()]; int j = 0; for (Path testFile : truth.keySet()) { testFiles[j++] = testFile; } int actualThreadCount = numThreads + ((randomlyResizeSAXPool > 0) ? randomlyResizeSAXPool : 0); ExecutorService ex = Executors.newFixedThreadPool(actualThreadCount); try { _testDetectorOnAll(detector, testFiles, numThreads, numIterations, truth, ex, randomlyResizeSAXPool); } finally { ex.shutdown(); ex.shutdownNow(); } }
Map<Path, Extract> truth = getBaseline(parser, files, parseContext[0]); _testAll(parser, files, parseContext, numThreads, numIterations, truth, ex); } finally { ex.shutdown();
void testDetectorEach(Detector detector, Path[] files, int numThreads, int numIterations, int randomlyResizeSAXPool) { for (Path p : files) { Path[] toTest = new Path[1]; toTest[0] = p; testDetectorOnAll(detector, toTest, numThreads, numIterations, randomlyResizeSAXPool); } }
/** * Test each file, one at a time in multiple threads. * This was required to test TIKA-2519 in a reasonable * amount of time. This forced the parser to use the * same underlying memory structures because it was the same file. * This is stricter than I think our agreement with clients is * because this run tests on literally the same file and * not a copy of the file per thread. Let's leave this as is * unless there's a good reason to create a separate copy per thread. * * @param files files to test, one at a time * @param numThreads number of threads to use * @param numIterations number of iterations per thread */ protected void testEach(Parser parser, Path[] files, ParseContext[] parseContext, int numThreads, int numIterations) { for (Path p : files) { Path[] toTest = new Path[1]; toTest[0] = p; testAll(parser, toTest, parseContext, numThreads, numIterations); } }
private static ConcurrentHashMap<Path, Extract> getBaseline(Parser parser, Path[] files, ParseContext parseContext) { ConcurrentHashMap<Path, Extract> baseline = new ConcurrentHashMap<>(); for (Path f : files) { try (TikaInputStream is = TikaInputStream.get(f)) { List<Metadata> metadataList = getRecursiveMetadata(is, parser, parseContext); baseline.put(f, new Extract(metadataList)); } catch (Exception e) { e.printStackTrace(); //swallow } } return baseline; }