@Test public void test() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText( "The CAS Number for water is 7732-18-5, but carbon could be either CASRN:7440-44-0 or CAS Registry Number 7782-42-5. CAS Number 7440-44-5 is not valid."); processJCas(); assertEquals(3, JCasUtil.select(jCas, Chemical.class).size()); assertEquals("7732-18-5", JCasUtil.selectByIndex(jCas, Chemical.class, 0).getCoveredText()); assertEquals("7440-44-0", JCasUtil.selectByIndex(jCas, Chemical.class, 1).getCoveredText()); assertEquals("7782-42-5", JCasUtil.selectByIndex(jCas, Chemical.class, 2).getCoveredText()); } }
@Test public void testWrongType() throws Exception { AnalysisEngine ncAE = AnalysisEngineFactory.createEngine(NormalizeOSGB.class); String coordinateValue = "tq299804"; createAndAddLocationEntity(coordinateValue, "osgb"); ncAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Location.class).size()); assertEquals("tq299804", JCasUtil.selectByIndex(jCas, Location.class, 0).getValue()); assertEquals(false, JCasUtil.selectByIndex(jCas, Location.class, 0).getIsNormalised()); }
@Test public void testDifferentGeoJson() throws Exception { AnalysisEngine rneAE = AnalysisEngineFactory.createEngine(RemoveNestedLocations.class); jCas.setDocumentText(NORTH_LONDON); Location l1 = Annotations.createLocation(jCas, 6, 12, LONDON, FAKE_GEOJSON); Location l2 = Annotations.createLocation(jCas, 0, 12, NORTH_LONDON, "different"); rneAE.process(jCas); assertEquals(2, JCasUtil.select(jCas, Location.class).size()); assertEquals(l2, JCasUtil.selectByIndex(jCas, Location.class, 0)); assertEquals(l1, JCasUtil.selectByIndex(jCas, Location.class, 1)); }
@Test public void testSpaces() throws Exception { AnalysisEngine ncAE = AnalysisEngineFactory.createEngine(NormalizeOSGB.class); String coordinateValue = "TQ 299 804"; createAndAddCoordinateEntity(coordinateValue, SUB_TYPE); ncAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Coordinate.class).size()); assertEquals(CORRECT_FORMAT, JCasUtil.selectByIndex(jCas, Coordinate.class, 0).getValue()); assertEquals(true, JCasUtil.selectByIndex(jCas, Coordinate.class, 0).getIsNormalised()); }
@Test public void testTab() throws Exception { AnalysisEngine nwAE = AnalysisEngineFactory.createEngine(NormalizeWhitespace.class); jCas.setDocumentText("Peter\tSmith lives in Salisbury"); Annotations.createPerson(jCas, 0, 11, "Peter\tSmith"); nwAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); assertEquals(CORRECT_WHITESPACING, JCasUtil.selectByIndex(jCas, Person.class, 0).getValue()); assertEquals(true, JCasUtil.selectByIndex(jCas, Person.class, 0).getIsNormalised()); }
@Test public void testNewLine() throws Exception { AnalysisEngine nwAE = AnalysisEngineFactory.createEngine(NormalizeWhitespace.class); jCas.setDocumentText("Peter\nSmith lives in Salisbury"); Annotations.createPerson(jCas, 0, 11, "Peter\nSmith"); nwAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); assertEquals(CORRECT_WHITESPACING, JCasUtil.selectByIndex(jCas, Person.class, 0).getValue()); assertEquals(true, JCasUtil.selectByIndex(jCas, Person.class, 0).getIsNormalised()); }
@Test public void testMillimetres() throws Exception { jCas.setDocumentText("It was 1mm thick wide."); processJCas(); Quantity q = JCasUtil.selectByIndex(jCas, Quantity.class, 0); checkQuantityProperties(q, 7, 10, "1mm", 1d, 0.001d, "mm", "m"); }
@Test public void testInches() throws Exception { jCas.setDocumentText("It was 60 inch deep."); processJCas(); Quantity q = JCasUtil.selectByIndex(jCas, Quantity.class, 0); checkQuantityProperties(q, 7, 14, "60 inch", 60d, 1.524, "in", "m"); }
@Test public void testMiddle() throws Exception { // Test cases where the brackets appear in the middle (should ignore and not split) jCas.setDocumentText("His name was Andrew (Drew) Smith"); Person p = new Person(jCas, 13, 32); p.addToIndexes(); processJCas(); assertEquals(1, JCasUtil.select(jCas, Person.class).size()); assertEquals(p, JCasUtil.selectByIndex(jCas, Person.class, 0)); }
@Test public void testOneGeoJsonOuter() throws Exception { AnalysisEngine rneAE = AnalysisEngineFactory.createEngine(RemoveNestedLocations.class); jCas.setDocumentText(NORTH_LONDON); Annotations.createLocation(jCas, 6, 12, LONDON, null); Location l2 = Annotations.createLocation(jCas, 0, 12, NORTH_LONDON, "different"); rneAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Location.class).size()); assertEquals(l2, JCasUtil.selectByIndex(jCas, Location.class, 0)); }
@Test public void testDropLeadingZeroes() throws Exception { jCas.setDocumentText( "Here's an IPv6 address with leading zeroes dropped: fe80:0:0:0:204:61ff:fe9d:f156."); processJCas(); assertEquals(1, JCasUtil.select(jCas, CommsIdentifier.class).size()); CommsIdentifier ip = JCasUtil.selectByIndex(jCas, CommsIdentifier.class, 0); assertEquals("fe80:0:0:0:204:61ff:fe9d:f156", ip.getCoveredText()); assertEquals("ipv6address", ip.getSubType()); }
@Test public void testCollapseLeadingZeroes() throws Exception { jCas.setDocumentText( "Here's an IPv6 address with collapsed leading zeroes: (fe80::204:61ff:fe9d:f156)"); processJCas(); assertEquals(1, JCasUtil.select(jCas, CommsIdentifier.class).size()); CommsIdentifier ip = JCasUtil.selectByIndex(jCas, CommsIdentifier.class, 0); assertEquals("fe80::204:61ff:fe9d:f156", ip.getCoveredText()); assertEquals("ipv6address", ip.getSubType()); }
@Test public void testBracketsClose() throws Exception { AnalysisEngine cleanBracketAE = AnalysisEngineFactory.createEngine(CleanPunctuation.class); String t = "(Oh dear), we've pulled out too many (closing brackets!))"; jCas.setDocumentText(t); Annotations.createEntity(jCas, 0, t.length(), TEXT_BRACKETS); cleanBracketAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Entity.class).size()); Entity ent = JCasUtil.selectByIndex(jCas, Entity.class, 0); assertEquals("(Oh dear), we've pulled out too many (closing brackets!)", ent.getCoveredText()); assertEquals("(Oh dear), we've pulled out too many (closing brackets!)", ent.getValue()); }
@Test public void testPath() throws Exception { DocumentAnnotation da = (DocumentAnnotation) jCas.getDocumentAnnotationFs(); da.setSourceUri(file); processJCas(); assertEquals(1, JCasUtil.select(jCas, Metadata.class).size()); Metadata md = JCasUtil.selectByIndex(jCas, Metadata.class, 0); assertEquals("source", md.getKey()); assertEquals(file, md.getValue()); }
@Test public void testBracketsOpen() throws Exception { AnalysisEngine cleanBracketAE = AnalysisEngineFactory.createEngine(CleanPunctuation.class); jCas.setDocumentText(TEXT_BRACKETS); Annotations.createEntity(jCas, 0, jCas.getDocumentText().length(), TEXT_BRACKETS); cleanBracketAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Entity.class).size()); Entity ent = JCasUtil.selectByIndex(jCas, Entity.class, 0); assertEquals("(Oh dear), we've (pulled out) too many opening brackets", ent.getCoveredText()); assertEquals("(Oh dear), we've (pulled out) too many opening brackets", ent.getValue()); }
@Test public void testToday() throws Exception { jCas.setDocumentText("Today is Wednesday"); setDocumentDate(); processJCas(); assertEquals(1, JCasUtil.select(jCas, Temporal.class).size()); Temporal t1 = JCasUtil.selectByIndex(jCas, Temporal.class, 0); assertEquals("Today", t1.getCoveredText()); assertEquals(relativeTo.atStartOfDay().toEpochSecond(ZoneOffset.UTC), t1.getTimestampStart()); assertEquals( relativeTo.plusDays(1).atStartOfDay().toEpochSecond(ZoneOffset.UTC), t1.getTimestampStop()); }
@Test public void testMilli() throws Exception { jCas.setDocumentText("47 people were seen at 1507725753567."); processJCas(EpochTime.PARAM_EARLIEST, "1500000000", EpochTime.PARAM_MILLIS, true); assertEquals(1, JCasUtil.select(jCas, Temporal.class).size()); Temporal t = JCasUtil.selectByIndex(jCas, Temporal.class, 0); assertEquals("1507725753567", t.getCoveredText()); assertEquals(1507725753, t.getTimestampStart()); assertEquals(1507725754, t.getTimestampStop()); }
@Test public void testProperty() throws Exception { jCas.setDocumentText("Guy Fawkes was caught in London"); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, LOCATION); assertEquals(1, JCasUtil.select(jCas, Location.class).size()); Location lLon = JCasUtil.selectByIndex(jCas, Location.class, 0); assertEquals("London", lLon.getValue()); assertEquals("London", lLon.getCoveredText()); assertEquals(LONDON_GEOJSON.toJson(), lLon.getGeoJson()); }
@Test public void testNP() throws Exception { jCas.setDocumentText("The former school house at GR 1234 5678."); languageAE.process(jCas); Coordinate c = new Coordinate(jCas, 27, 39); c.addToIndexes(); processJCas(); assertEquals(2, JCasUtil.select(jCas, Location.class).size()); Location l = JCasUtil.selectByIndex(jCas, Location.class, 0); assertNotNull(l.getReferent()); assertEquals(l.getReferent(), c.getReferent()); }
@Test public void testBuzzwordProperty() throws Exception { jCas.setDocumentText("Hello Sydney (Australia), this is a test"); processJCas(MONGO, erd, COLLECTION, MONGO_COLL, TYPE, "Buzzword"); assertEquals(1, JCasUtil.select(jCas, Buzzword.class).size()); Buzzword b = JCasUtil.selectByIndex(jCas, Buzzword.class, 0); assertEquals("Sydney (Australia", b.getValue()); assertEquals("Sydney (Australia", b.getCoveredText()); StringArray tags = b.getTags(); assertEquals(1, tags.size()); assertEquals("broken_regex", tags.get(0)); }