@Test public void testKeyFile() throws UIMAException, IOException { Path keyTmpDir = Files.createTempDirectory("muctest"); Files.write(keyTmpDir.resolve("key-test"), MUC.getBytes(StandardCharsets.UTF_8)); try { BaleenCollectionReader bcr = getCollectionReader(MucReader.KEY_PATH, keyTmpDir.toAbsolutePath().toString()); bcr.initialize(); fail("Expected exception not thrown"); } catch (ResourceInitializationException be) { // Do nothing, expected exception assertEquals(BaleenException.class, be.getCause().getClass()); } keyTmpDir.toFile().delete(); }
@Test public void testNoFiles() throws UIMAException, IOException { Path emptyTmpDir = Files.createTempDirectory("muctest"); try { BaleenCollectionReader bcr = getCollectionReader(MucReader.KEY_PATH, emptyTmpDir.toAbsolutePath().toString()); bcr.initialize(); fail("Expected exception not thrown"); } catch (ResourceInitializationException be) { // Do nothing, expected exception assertEquals(BaleenException.class, be.getCause().getClass()); } emptyTmpDir.toFile().delete(); }
@Test public void testDocumentText() throws Exception { BaleenCollectionReader bcr = getCollectionReader(MimeReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString()); bcr.initialize(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertTrue(jCas.getDocumentText().startsWith("I'm going to eat you!")); bcr.close(); } }
@Test public void test() throws UIMAException, IOException { BaleenCollectionReader bcr = getCollectionReader(MucReader.KEY_PATH, tmpDir.toAbsolutePath().toString()); bcr.initialize(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); assertEquals("DEV-MUC3-0001 (NOSC)", getSource(jCas)); String s = "THE ARCE BATTALION COMMAND HAS REPORTED THAT ABOUT 50 " + "PEASANTS OF VARIOUS AGES HAVE BEEN KIDNAPPED BY TERRORISTS OF THE " + "FARABUNDO MARTI NATIONAL LIBERATION FRONT IN SAN MIGUEL " + "DEPARTMENT. ACCORDING TO THAT GARRISON, THE MASS KIDNAPPING TOOK PLACE ON " + "30 DECEMBER IN SAN LUIS DE LA REINA. THE SOURCE ADDED THAT THE TERRORISTS " + "FORCED THE INDIVIDUALS, WHO WERE TAKEN TO AN UNKNOWN LOCATION, OUT OF " + "THEIR RESIDENCES, PRESUMABLY TO INCORPORATE THEM AGAINST THEIR WILL INTO " + "CLANDESTINE GROUPS."; s = s.toLowerCase(); assertEquals(s, jCas.getDocumentText()); assertFalse(bcr.doHasNext()); bcr.close(); }
"contentExtractor", "PlainTextContentExtractor"); bcr.initialize();
@Test public void testMetadata() throws Exception { BaleenCollectionReader bcr = getCollectionReader(MimeReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString()); bcr.initialize(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); AnnotationIndex<Metadata> annotationIndex = jCas.getAnnotationIndex(Metadata.class); // returns the X-From and not the From, these can differ assertEquals(X_FROM, get(annotationIndex, "email.X-From")); assertEquals(TO, get(annotationIndex, "email.X-To")); assertEquals(MESSAGE_ID, get(annotationIndex, "email.Message-ID")); bcr.close(); }
EmailReader.PARAM_PROCESS, "content"); bcr.initialize();
folder.getPath()); bcr.initialize();
folder.getPath()); bcr.initialize();
true); bcr.initialize();
true); bcr.initialize();
@Test public void testPopNoMessages() throws Exception { BaleenCollectionReader bcr = getCollectionReader( EmailReader.PARAM_PROTOCOL, "pop3", EmailReader.PARAM_WAIT, 5, EmailReader.PARAM_SERVER, greenMail.getPop3().getBindTo(), EmailReader.PARAM_PORT, greenMail.getPop3().getPort(), EmailReader.PARAM_USER, "to@localhost.com", EmailReader.PARAM_PASS, "password"); bcr.initialize(); assertFalse(bcr.doHasNext()); bcr.close(); }
@Test public void testImapNoMessages() throws Exception { BaleenCollectionReader bcr = getCollectionReader( EmailReader.PARAM_PROTOCOL, "imap", EmailReader.PARAM_WAIT, 5, EmailReader.PARAM_SERVER, greenMail.getImap().getBindTo(), EmailReader.PARAM_PORT, greenMail.getImap().getPort(), EmailReader.PARAM_USER, "to@localhost.com", EmailReader.PARAM_PASS, "password"); bcr.initialize(); assertFalse(bcr.doHasNext()); bcr.close(); }
@Test public void test() throws IOException, UIMAException { BaleenCollectionReader bcr = getCollectionReader(ReutersReader.KEY_PATH, tmpDir.toAbsolutePath().toString()); bcr.initialize(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); // assertEquals("DEV-MUC3-0001 (NOSC)", getSource(jCas)); assertEquals("Some example\ntext.", jCas.getDocumentText()); jCas.reset(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); // assertEquals("DEV-MUC3-0001 (NOSC)", getSource(jCas)); assertEquals("Another example", jCas.getDocumentText()); assertFalse(bcr.doHasNext()); bcr.close(); } }
folder.getPath()); bcr.initialize();
folder.getPath()); bcr.initialize();
@Test public void testPopWait() throws Exception { BaleenCollectionReader bcr = getCollectionReader( EmailReader.PARAM_PROTOCOL, "pop3", EmailReader.PARAM_WAIT, 5, EmailReader.PARAM_SERVER, greenMail.getPop3().getBindTo(), EmailReader.PARAM_PORT, greenMail.getPop3().getPort(), EmailReader.PARAM_USER, "to@localhost.com", EmailReader.PARAM_PASS, "password", EmailReader.PARAM_PROCESS, "content"); bcr.initialize(); assertFalse(bcr.doHasNext()); GreenMailUtil.sendTextEmailTest( "to@localhost.com", "from@localhost.com", GreenMailUtil.random(), GreenMailUtil.random()); assertFalse(bcr.doHasNext()); // Should be a 5 second delay before it returns true Thread.sleep(5000); assertTrue(bcr.doHasNext()); bcr.close(); }
EmailReader.PARAM_PROCESS, "content"); bcr.initialize();
EmailReader.PARAM_PROCESS, "content"); bcr.initialize();
@Test public void testPopLongWait() throws Exception { String subject = GreenMailUtil.random(); String body = GreenMailUtil.random(); String subject2 = GreenMailUtil.random(); String body2 = GreenMailUtil.random(); GreenMailUtil.sendTextEmailTest("to@localhost.com", "from@localhost.com", subject, body); GreenMailUtil.sendTextEmailTest("to@localhost.com", "from@localhost.com", subject2, body2); BaleenCollectionReader bcr = getCollectionReader( EmailReader.PARAM_PROTOCOL, "pop3", EmailReader.PARAM_WAIT, 15, EmailReader.PARAM_SERVER, greenMail.getPop3().getBindTo(), EmailReader.PARAM_PORT, greenMail.getPop3().getPort(), EmailReader.PARAM_USER, "to@localhost.com", EmailReader.PARAM_PASS, "password", EmailReader.PARAM_PROCESS, "content"); bcr.initialize(); assertTrue(bcr.doHasNext()); bcr.getNext(jCas); jCas.reset(); Thread.sleep(20000); assertTrue(bcr.doHasNext()); bcr.getNext(jCas); jCas.reset(); assertFalse(bcr.doHasNext()); bcr.close(); }