private JCas createJas() throws ResourceInitializationException { try { return JCasFactory.createJCas(TypeSystemSingleton.getTypeSystemDescriptionInstance()); } catch (UIMAException e) { throw new ResourceInitializationException(e); } }
private AnalysisEngine createAnalysisEngine(Object... args) throws ResourceInitializationException { return AnalysisEngineFactory.createEngine( MemoryTransportSender.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), args); } }
/** * Get an analysis engine for the * * @return * @throws ResourceInitializationException */ protected AnalysisEngine getAnalysisEngine() throws ResourceInitializationException { return AnalysisEngineFactory.createEngine( annotatorClass, TypeSystemSingleton.getTypeSystemDescriptionInstance()); }
/** * Get an analysis engine for the * * @param args name-value pairs * @return * @throws ResourceInitializationException */ protected AnalysisEngine getAnalysisEngine(Object... args) throws ResourceInitializationException { return AnalysisEngineFactory.createEngine( annotatorClass, TypeSystemSingleton.getTypeSystemDescriptionInstance(), args); }
protected BaleenCollectionReader getCollectionReader(Object... args) throws ResourceInitializationException { Object[] argumentWithExtractor = ImmutableList.builder() .add(KEY_CONTENT_EXTRACTOR) .add(contentExtractor) .addAll(Arrays.asList(args)) .build() .toArray(); return (BaleenCollectionReader) CollectionReaderFactory.createReader( readerClass, TypeSystemSingleton.getTypeSystemDescriptionInstance(), argumentWithExtractor); } }
public static JCas getJCasInstance() throws UIMAException { if (jCas == null) { jCas = JCasFactory.createJCas(TypeSystemSingleton.getTypeSystemDescriptionInstance()); } else { jCas.reset(); } return jCas; } }
private MemoryTransportReceiver createReciever() throws ResourceInitializationException { return (MemoryTransportReceiver) CollectionReaderFactory.createReader( MemoryTransportReceiver.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), KEY_CONTENT_EXTRACTOR, ExternalResourceFactory.createExternalResourceDescription( KEY_CONTENT_EXTRACTOR, FakeBaleenContentExtractor.class), SharedMemoryQueueResource.RESOURCE_KEY, erd); }
@BeforeClass public static void setupClass() throws UIMAException { typeSystemDescription = TypeSystemSingleton.getTypeSystemDescriptionInstance(); jCas = JCasFactory.createJCas(typeSystemDescription); }
@Test public void testBadBasePath() throws Exception { File baseDir = File.createTempFile("baleen", ".foo"); try { AnalysisEngineFactory.createEngine( TestFileConsumer.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), BASE_PATH, baseDir.getPath()); fail("Didn't throw expected exception"); } catch (ResourceInitializationException rie) { // Expected exception } }
@Test public void testEntityCountOutputReadOnly() throws Exception { File output = Files.createTempFile("baleen-entitycount", ".tsv").toFile(); output.setReadOnly(); try { AnalysisEngineFactory.createEngine( EntityCount.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), OUTPUT_FILE, output.getPath()); fail("Expected exception not thrown"); } catch (Exception ex) { // Do nothing } output.delete(); }
@Test public void testEntityCountOutputCantWrite() throws Exception { File output = Files.createTempDirectory("baleen").toFile(); createDocument(); // Try writing to folder AnalysisEngine consumer = AnalysisEngineFactory.createEngine( EntityCount.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), OUTPUT_FILE, output.getPath()); consumer.process(jCas); consumer.destroy(); output.delete(); } }
@Test public void testEntityCountOutputNewFile() throws Exception { File outputFolder = Files.createTempDirectory("baleen").toFile(); File output = new File(outputFolder, "baleen-entitycount.tsv"); AnalysisEngine consumer = AnalysisEngineFactory.createEngine( EntityCount.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), OUTPUT_FILE, output.getPath()); createDocument(); consumer.process(jCas); assertEquals("test1.txt\t2", FileUtils.file2String(output).trim()); consumer.destroy(); output.delete(); outputFolder.delete(); }
@Test public void testEntityCountOutput() throws Exception { File output = Files.createTempFile("baleen-entitycount", ".tsv").toFile(); AnalysisEngine consumer = AnalysisEngineFactory.createEngine( EntityCount.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), OUTPUT_FILE, output.getPath()); createDocument(); consumer.process(jCas); assertEquals("test1.txt\t2", FileUtils.file2String(output).trim()); consumer.destroy(); output.delete(); }
@Test public void testNullBasePath() throws Exception { AnalysisEngine consumer = AnalysisEngineFactory.createEngine( TestFileConsumer.class, TypeSystemSingleton.getTypeSystemDescriptionInstance()); DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs(); da.setSourceUri(FILENAME); consumer.process(jCas); File f = new File(FILENAME); assertTrue(f.exists()); f.delete(); } }
@Test public void testCreateExistingFile() throws UIMAException, IOException { AnalysisEngine consumer = AnalysisEngineFactory.createEngine( Html5.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), Html5.PARAM_OUTPUT_FOLDER, outputFolder.getPath()); jCas.setDocumentText("Hello World!"); DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs(); da.setSourceUri("hello.txt"); File fExisting = new File(outputFolder, "hello.txt.html"); fExisting.createNewFile(); consumer.process(jCas); File f = new File(outputFolder, "hello.txt.1.html"); assertTrue(f.exists()); }
@Test public void testCSS() throws UIMAException, IOException { AnalysisEngine consumer = AnalysisEngineFactory.createEngine( Html5.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), Html5.PARAM_OUTPUT_FOLDER, outputFolder.getPath(), Html5.PARAM_CSS, "test.css"); jCas.setDocumentText("This is a test document."); consumer.process(jCas); DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs(); File f = new File(outputFolder, da.getHash() + ".html"); assertTrue(f.exists()); Document doc = Jsoup.parse(f, "UTF-8"); Elements links = doc.select("link"); assertEquals(1, links.size()); Element link = links.get(0); assertEquals("stylesheet", link.attr("rel")); assertEquals("test.css", link.attr("href")); }
@Test public void testCreateOutputDir() throws UIMAException { File newFolder = new File(outputFolder, "test"); AnalysisEngine consumer = AnalysisEngineFactory.createEngine( Html5.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), Html5.PARAM_OUTPUT_FOLDER, newFolder.getPath()); jCas.setDocumentText("Hello World!"); DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs(); da.setSourceUri("hello.txt"); consumer.process(jCas); File f = new File(newFolder, "hello.txt.html"); assertTrue(f.exists()); }
@Test public void testCreateFile() throws UIMAException { AnalysisEngine consumer = AnalysisEngineFactory.createEngine( Html5.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), Html5.PARAM_OUTPUT_FOLDER, outputFolder.getPath()); jCas.setDocumentText("Hello World!"); DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs(); da.setSourceUri("hello.txt"); consumer.process(jCas); File f = new File(outputFolder, "hello.txt.html"); assertTrue(f.exists()); }
@Test public void testNoSource() throws Exception { File baseDir = Files.createTempDir(); AnalysisEngine consumer = AnalysisEngineFactory.createEngine( TestFileConsumer.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), BASE_PATH, baseDir.getPath(), "extension", "txt"); jCas.setDocumentText(TEXT); consumer.process(jCas); String s = FileUtils.file2String( new File( baseDir, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855.txt")); assertEquals(TEXT, s); }
@Test public void testMove() throws Exception { File destinationFolder = Files.createTempDir(); AnalysisEngine consumer = AnalysisEngineFactory.createEngine( MoveSourceFile.class, TypeSystemSingleton.getTypeSystemDescriptionInstance(), MoveSourceFile.PARAM_DESTINATION, destinationFolder.getPath()); File f = new File(sourceFolder, BALEEN_TXT); if (!f.exists()) f.createNewFile(); File f2 = new File(destinationFolder, BALEEN_TXT); assertEquals(false, f2.exists()); DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs(); da.setSourceUri(f.getPath()); consumer.process(jCas); assertEquals(false, f.exists()); assertEquals(true, f2.exists()); Metadata md = JCasUtil.selectByIndex(jCas, Metadata.class, 0); assertNotNull(md); assertEquals(MOVED_DOCUMENT_LOCATION, md.getKey()); assertEquals(f2.getPath(), md.getValue()); f2.delete(); destinationFolder.delete(); }