@Test public void testProcess() throws Exception { jCas.setDocumentText( new String(Files.readAllBytes(Paths.get(getClass().getResource("turing.txt").toURI())))); processJCas(KEY_STOPWORDS, erd, PARAM_STEMMING, "ENGLISH"); JCasMetadata metadata = new JCasMetadata(jCas); Set<String> keywords = metadata.findAll(KEYWORD_METADATA_KEY); assertEquals(5, keywords.size()); assertTrue(keywords.contains("machine")); assertTrue(keywords.contains("computer")); assertTrue(keywords.contains("digital computers")); assertTrue(keywords.contains("state")); // Same score so either is valid assertTrue(keywords.contains("question") || keywords.contains("digital")); assertTrue(JCasUtil.select(jCas, Buzzword.class).size() > 0); Set<String> buzzwords = new HashSet<>(); for (Buzzword bw : JCasUtil.select(jCas, Buzzword.class)) { assertEquals("keyword", bw.getTags(0)); buzzwords.add(bw.getValue()); } assertTrue(buzzwords.contains("machines")); assertTrue(buzzwords.contains("computing")); assertTrue(buzzwords.contains("questioning")); }
@Test public void test() throws Exception { jCas.setDocumentText("Forty seven knights conspired against the crown."); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, BUZZWORD); assertEquals(2, JCasUtil.select(jCas, Buzzword.class).size()); Buzzword b1 = JCasUtil.selectByIndex(jCas, Buzzword.class, 0); assertEquals("knights", b1.getValue()); assertEquals("knights", b1.getCoveredText()); Buzzword b2 = JCasUtil.selectByIndex(jCas, Buzzword.class, 1); assertEquals("conspired", b2.getValue()); assertEquals("conspired", b2.getCoveredText()); }
for (Buzzword bw : JCasUtil.select(jCas, Buzzword.class)) { assertEquals("keyword", bw.getTags(0)); buzzwords.add(bw.getValue());
@Test public void testMultipleWords() throws Exception { jCas.setDocumentText("Bill and Ben entered the room on a dark and windy night."); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, BUZZWORD); assertEquals(1, JCasUtil.select(jCas, Buzzword.class).size()); Buzzword b1 = JCasUtil.selectByIndex(jCas, Buzzword.class, 0); assertEquals("entered the room", b1.getValue()); assertEquals("entered the room", b1.getCoveredText()); }
@Test public void testCoref() throws Exception { jCas.setDocumentText("Lords, ladies, sirs, and madames..."); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, BUZZWORD); assertEquals(2, JCasUtil.select(jCas, Buzzword.class).size()); assertEquals(1, JCasUtil.select(jCas, ReferenceTarget.class).size()); ReferenceTarget rt = JCasUtil.selectByIndex(jCas, ReferenceTarget.class, 0); Buzzword b1 = JCasUtil.selectByIndex(jCas, Buzzword.class, 0); assertEquals("ladies", b1.getValue()); assertEquals("ladies", b1.getCoveredText()); assertEquals(rt, b1.getReferent()); Buzzword b2 = JCasUtil.selectByIndex(jCas, Buzzword.class, 1); assertEquals("sirs", b2.getValue()); assertEquals("sirs", b2.getCoveredText()); assertEquals(rt, b2.getReferent()); } }
@Test public void testBuzzwordProperty() throws Exception { jCas.setDocumentText("Hello Sydney (Australia), this is a test"); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, "Buzzword"); assertEquals(1, JCasUtil.select(jCas, Buzzword.class).size()); Buzzword b = JCasUtil.selectByIndex(jCas, Buzzword.class, 0); assertEquals("Sydney (Australia", b.getValue()); assertEquals("Sydney (Australia", b.getCoveredText()); StringArray tags = b.getTags(); assertEquals(1, tags.size()); assertEquals("broken_regex", tags.get(0)); }