@Override public Collection<Candidate> getCandidates(EntityInformation<Person> entityInformation) { Collection<Person> people = entityInformation.getMentions(); Set<Candidate> candidates = new HashSet<>(); people.forEach( person -> { try { candidates.addAll(queryByNameContains(person.getValue())); } catch (ParseException e) { LOGGER.error(e.getMessage()); } }); return candidates; }
@Override public Collection<Candidate> getCandidates(EntityInformation<Person> entityInformation) { Collection<Person> people = entityInformation.getMentions(); Set<Candidate> candidates = new HashSet<>(); people.forEach( person -> { try { candidates.addAll(queryByNameContains(person.getValue())); } catch (ParseException e) { LOGGER.error(e.getMessage()); } }); return candidates; }
@Test public void test() throws Exception { AnalysisEngineDescription desc = AnalysisEngineFactory.createEngineDescription( OpenNLP.class, TYPE, PERSON, MODEL, getClass().getResource("en_ner_person.bin").getPath()); AnalysisEngine ae = AnalysisEngineFactory.createEngine(desc); SimplePipeline.runPipeline(jCas, aeLanguage, ae); assertEquals(2, JCasUtil.select(jCas, Person.class).size()); assertEquals("John Smith", JCasUtil.selectByIndex(jCas, Person.class, 0).getValue()); assertEquals("Thomas Brown", JCasUtil.selectByIndex(jCas, Person.class, 1).getValue()); ae.destroy(); }
@Test public void testNoChunks() throws Exception { AnalysisEngine ae = AnalysisEngineFactory.createEngine(TestAnnotator.class); jCas.setDocumentText("PERSON JOHN SMITH was seen entering the warehouse"); ae.process(jCas); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); assertEquals("JOHN SMITH", JCasUtil.selectByIndex(jCas, Person.class, 0).getValue()); }
@Test public void testAddAnnotationArray() { UimaSupport support = new UimaSupport(PIPELINE, UimaSupportTest.class, history, monitor, false); Person withValue = new Person(jCas); withValue.setBegin(0); withValue.setEnd(4); withValue.setValue("David"); Person withoutValue = new Person(jCas); withoutValue.setBegin(0); withoutValue.setEnd(4); support.add(new ProtectiveMarking(jCas), withValue, withoutValue); assertEquals(1, JCasUtil.select(jCas, ProtectiveMarking.class).size()); List<Person> persons = new ArrayList<Person>(JCasUtil.select(jCas, Person.class)); assertEquals(2, persons.size()); // Check value is set / not overridden assertNotEquals(persons.get(0).getCoveredText(), persons.get(0).getValue()); assertEquals(persons.get(1).getCoveredText(), persons.get(1).getValue()); // Check Id set assertNotEquals(persons.get(0).getInternalId(), persons.get(1).getInternalId()); // Check had history of addition assertFalse( support.getDocumentHistory(jCas).getHistory(persons.get(1).getInternalId()).isEmpty()); }
@Test public void testNewLine() throws Exception { AnalysisEngine nwAE = AnalysisEngineFactory.createEngine(NormalizeWhitespace.class); jCas.setDocumentText("Peter\nSmith lives in Salisbury"); Annotations.createPerson(jCas, 0, 11, "Peter\nSmith"); nwAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); assertEquals(CORRECT_WHITESPACING, JCasUtil.selectByIndex(jCas, Person.class, 0).getValue()); assertEquals(true, JCasUtil.selectByIndex(jCas, Person.class, 0).getIsNormalised()); }
@Test public void testTab() throws Exception { AnalysisEngine nwAE = AnalysisEngineFactory.createEngine(NormalizeWhitespace.class); jCas.setDocumentText("Peter\tSmith lives in Salisbury"); Annotations.createPerson(jCas, 0, 11, "Peter\tSmith"); nwAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); assertEquals(CORRECT_WHITESPACING, JCasUtil.selectByIndex(jCas, Person.class, 0).getValue()); assertEquals(true, JCasUtil.selectByIndex(jCas, Person.class, 0).getIsNormalised()); }
@Test public void testSpaces() throws Exception { AnalysisEngine nwAE = AnalysisEngineFactory.createEngine(NormalizeWhitespace.class); jCas.setDocumentText("Peter Smith lives in Salisbury"); Annotations.createPerson(jCas, 0, 12, "Peter Smith"); nwAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); assertEquals(CORRECT_WHITESPACING, JCasUtil.selectByIndex(jCas, Person.class, 0).getValue()); assertEquals(true, JCasUtil.selectByIndex(jCas, Person.class, 0).getIsNormalised()); }
@Test public void testMixed() throws Exception { AnalysisEngine nwAE = AnalysisEngineFactory.createEngine(NormalizeWhitespace.class); jCas.setDocumentText("Peter\n \n\n\tSmith lives in Salisbury"); Annotations.createPerson(jCas, 0, 11, "Peter\n \n\n\tSmith"); nwAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); assertEquals(CORRECT_WHITESPACING, JCasUtil.selectByIndex(jCas, Person.class, 0).getValue()); assertEquals(true, JCasUtil.selectByIndex(jCas, Person.class, 0).getIsNormalised()); }
@Test public void testNoValue() throws Exception { AnalysisEngine nwAE = AnalysisEngineFactory.createEngine(NormalizeWhitespace.class); jCas.setDocumentText("Peter Smith lives in Salisbury"); Annotations.createPerson(jCas, 0, 11, "Peter Smith"); JCasUtil.selectByIndex(jCas, Person.class, 0).setValue(null); nwAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); assertEquals(null, JCasUtil.selectByIndex(jCas, Person.class, 0).getValue()); assertEquals(false, JCasUtil.selectByIndex(jCas, Person.class, 0).getIsNormalised()); } }
@Test public void testSingleTypes() throws Exception { jCas.setDocumentText("John Smith was seen at London King's Cross"); Annotations.createPerson(jCas, 0, 4, "John"); Annotations.createPerson(jCas, 5, 10, "Smith"); Location l1 = Annotations.createLocation(jCas, 23, 29, "London", null); Location l2 = Annotations.createLocation(jCas, 30, 42, "King's Cross", null); processJCas("types", new String[] {"Person"}); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); assertEquals(2, JCasUtil.select(jCas, Location.class).size()); Person p = JCasUtil.selectByIndex(jCas, Person.class, 0); assertEquals("John Smith", p.getCoveredText()); assertEquals("John Smith", p.getValue()); assertEquals(l1, JCasUtil.selectByIndex(jCas, Location.class, 0)); assertEquals(l2, JCasUtil.selectByIndex(jCas, Location.class, 1)); }
@Test public void testCaseSensitive() throws Exception { AnalysisEngine regexAE = AnalysisEngineFactory.createEngine( Custom.class, Custom.PARAM_TYPE, UK_GOV_DSTL_BALEEN_TYPES_COMMON_PERSON, Custom.PARAM_PATTERN, DIGIT_REGEX, Custom.PARAM_CASE_SENSITIVE, true); jCas.setDocumentText(TEXT); regexAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); Person p1 = JCasUtil.selectByIndex(jCas, Person.class, 0); assertNotNull(p1); assertEquals(P123, p1.getCoveredText()); assertEquals(P123, p1.getValue()); regexAE.destroy(); }
@Test public void testMultipleTypes() throws Exception { jCas.setDocumentText("John Smith was seen at London King's Cross"); Annotations.createPerson(jCas, 0, 4, "John"); Annotations.createPerson(jCas, 5, 10, "Smith"); Annotations.createLocation(jCas, 23, 29, "London", null); Annotations.createLocation(jCas, 30, 42, "King's Cross", null); processJCas("types", new String[] {"Person", "Location"}); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); assertEquals(1, JCasUtil.select(jCas, Location.class).size()); Person p = JCasUtil.selectByIndex(jCas, Person.class, 0); assertEquals("John Smith", p.getCoveredText()); assertEquals("John Smith", p.getValue()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals("London King's Cross", l.getCoveredText()); assertEquals("London King's Cross", l.getValue()); }
@Test public void testMultipleAdjacentTypes() throws Exception { jCas.setDocumentText("John Smith London King's Cross"); Annotations.createPerson(jCas, 0, 4, "John"); Annotations.createPerson(jCas, 5, 10, "Smith"); Annotations.createLocation(jCas, 11, 17, "London", null); Annotations.createLocation(jCas, 18, 30, "King's Cross", null); processJCas("types", new String[] {"Person", "Location"}); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); assertEquals(1, JCasUtil.select(jCas, Location.class).size()); Person p = JCasUtil.selectByIndex(jCas, Person.class, 0); assertEquals("John Smith", p.getCoveredText()); assertEquals("John Smith", p.getValue()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals("London King's Cross", l.getCoveredText()); assertEquals("London King's Cross", l.getValue()); }
@Test public void testStripPunctuation() throws Exception { AnalysisEngine cleaner = AnalysisEngineFactory.createEngine(CleanPunctuation.class); jCas.setDocumentText("Alice met Bill. \"Who goes there?\", he said."); // Test a space after Annotations.createPerson(jCas, 0, 6, "Alice "); // Test a preceding space and a trailing punctuation, with a different value Annotations.createPerson(jCas, 9, 15, "Bill."); // Test a different type, with multiple trailing punctuation Annotations.createLocation(jCas, 26, 34, "there?\",", null); // Test an entity made entirely of punctuation Annotations.createEntity(jCas, 31, 34, null); cleaner.process(jCas); assertEquals(2, JCasUtil.select(jCas, Person.class).size()); assertEquals(1, JCasUtil.select(jCas, Location.class).size()); assertEquals(3, JCasUtil.select(jCas, Entity.class).size()); Person p = JCasUtil.selectByIndex(jCas, Person.class, 0); assertEquals("Alice", p.getCoveredText()); assertEquals("Alice", p.getValue()); p = JCasUtil.selectByIndex(jCas, Person.class, 1); assertEquals("Bill", p.getCoveredText()); assertEquals("Bill", p.getValue()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals("there", l.getCoveredText()); assertEquals("there", l.getValue()); cleaner.destroy(); }
@Test public void testSeparator() throws Exception { jCas.setDocumentText("John Smith was seen at London\tKing's Cross"); Annotations.createPerson(jCas, 0, 4, "John"); Annotations.createPerson(jCas, 8, 13, "Smith"); Annotations.createLocation(jCas, 26, 32, "London", null); Annotations.createLocation(jCas, 33, 45, "King's Cross", null); processJCas("types", new String[] {"Person", "Location"}); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); assertEquals(1, JCasUtil.select(jCas, Location.class).size()); Person p = JCasUtil.selectByIndex(jCas, Person.class, 0); assertEquals("John Smith", p.getCoveredText()); assertEquals("John Smith", p.getValue()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals("London\tKing's Cross", l.getCoveredText()); assertEquals("London\tKing's Cross", l.getValue()); } }
@Test public void testPatternGroup() throws Exception { AnalysisEngine regexAE = AnalysisEngineFactory.createEngine( Custom.class, Custom.PARAM_TYPE, UK_GOV_DSTL_BALEEN_TYPES_COMMON_PERSON, Custom.PARAM_PATTERN, "\\b[A-Z][a-z]+\\s+([A-Z]+)\\b", Custom.PARAM_CASE_SENSITIVE, true, Custom.PARAM_GROUP, "1"); jCas.setDocumentText("John SMITH was seen speaking to p456"); regexAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); Person p1 = JCasUtil.selectByIndex(jCas, Person.class, 0); assertNotNull(p1); assertEquals("John SMITH", p1.getCoveredText()); assertEquals("SMITH", p1.getValue()); regexAE.destroy(); } }
@Test public void testAthleteIsMadePersonNoSource() throws AnalysisEngineProcessException, ResourceInitializationException { processJCas( TemplateFieldToEntityAnnotator.PARAM_ENTITY_TYPE, "common.Person", TemplateFieldToEntityAnnotator.PARAM_FIELD_NAME, "athlete", TemplateFieldToEntityAnnotator.PARAM_RECORD_NAME, "report"); Person person = JCasUtil.selectSingle(jCas, Person.class); assertEquals("fox", person.getValue()); assertEquals(16, person.getBegin()); assertEquals(19, person.getEnd()); }
@Test public void testAthleteIsMadePersonSource() throws AnalysisEngineProcessException, ResourceInitializationException { processJCas( TemplateFieldToEntityAnnotator.PARAM_ENTITY_TYPE, "common.Person", TemplateFieldToEntityAnnotator.PARAM_FIELD_NAME, "athlete", TemplateFieldToEntityAnnotator.PARAM_RECORD_NAME, "report", TemplateFieldToEntityAnnotator.PARAM_SOURCE, "brownSauce"); Person person = JCasUtil.selectSingle(jCas, Person.class); assertEquals("fox", person.getValue()); assertEquals(16, person.getBegin()); assertEquals(19, person.getEnd()); }
public void assertPersonMatches() { final Person inPerson = JCasUtil.selectSingle(in, Person.class); final Person outPerson = JCasUtil.selectSingle(out, Person.class); assertEquals(inPerson.getGender(), outPerson.getGender()); assertEquals(inPerson.getBegin(), outPerson.getBegin()); assertEquals(inPerson.getEnd(), outPerson.getEnd()); assertEquals(inPerson.getValue(), outPerson.getValue()); assertEquals(0, inPerson.getConfidence(), outPerson.getConfidence()); // Check that person to entity is deferenced and its the same as the one we get... final ReferenceTarget inRt = JCasUtil.selectSingle(in, ReferenceTarget.class); final ReferenceTarget outRtFromJCas = JCasUtil.selectSingle(out, ReferenceTarget.class); final ReferenceTarget outRt = outPerson.getReferent(); assertNotNull(outRt); assertEquals(inRt.getBegin(), outRt.getBegin()); assertEquals(inRt.getEnd(), outRt.getEnd()); assertSame(outRt, outRtFromJCas); }