private Optional<Entity> findEntity( Multimap<Offset, Entity> entityIndex, int begin, int end, String value) { return entityIndex .get(new Offset(begin, end)) .stream() .filter(e -> value.equals(e.getValue())) .findFirst(); }
private Optional<Entity> findEntity( Multimap<Offset, Entity> entityIndex, int begin, int end, String value) { return entityIndex .get(new Offset(begin, end)) .stream() .filter(e -> value.equals(e.getValue())) .findFirst(); }
private String generateSpanStart(Entity e) { String value = e.getValue() == null ? "" : e.getValue().replaceAll("\"", "'"); String referent = e.getReferent() == null ? "" : Long.toString(e.getReferent().getInternalId()); return String.format( "<span class=\"baleen %s\" id=\"%s\" value=\"%s\" data-referent=\"%s\">", e.getClass().getSimpleName(), e.getExternalId(), value, referent); }
private Optional<Bson> buildQuery(EntityInformation<T> entityInformation) { List<String> searchValues = new ArrayList<>(); entityInformation .getMentions() .stream() .filter(m -> m.getValue() != null) .forEach( mention -> { String[] mentionSearchTerms = mention.getValue().split(" "); searchValues.addAll(Arrays.asList(mentionSearchTerms)); }); if (searchValues.isEmpty()) { return Optional.empty(); } List<Bson> bsonList = new ArrayList<>(); for (String partialSearchTerm : searchValues) { bsonList.add(regex(argumentsMap.get(PARAM_SEARCH_FIELD), partialSearchTerm, "i")); } return Optional.of(or(bsonList)); }
private Optional<Bson> buildQuery(EntityInformation<T> entityInformation) { List<String> searchValues = new ArrayList<>(); entityInformation .getMentions() .stream() .filter(m -> m.getValue() != null) .forEach( mention -> { String[] mentionSearchTerms = mention.getValue().split(" "); searchValues.addAll(Arrays.asList(mentionSearchTerms)); }); if (searchValues.isEmpty()) { return Optional.empty(); } List<Bson> bsonList = new ArrayList<>(); for (String partialSearchTerm : searchValues) { bsonList.add(regex(argumentsMap.get(PARAM_SEARCH_FIELD), partialSearchTerm, "i")); } return Optional.of(or(bsonList)); }
@Override protected String print(Entity t) { final StringBuilder sb = new StringBuilder(); writeLine(sb, "Value", t.getValue()); writeLine(sb, "Type", t.getTypeName()); writeLine(sb, "Span", String.format("%d -> %d", t.getBegin(), t.getEnd())); return sb.toString(); } }
/** * Copy entity. * * @param jCas the j cas * @param begin the begin * @param end the end * @param entity the entity * @return the entity */ public static Entity copyEntity(JCas jCas, int begin, int end, Entity entity) { // TODO: This could be better, but would suggest if better is need try { final Entity instance = entity.getClass().getConstructor(JCas.class).newInstance(jCas); instance.setBegin(begin); instance.setEnd(end); instance.setReferent(entity.getReferent()); instance.setValue(entity.getValue()); return instance; } catch (final Exception e) { LOGGER.warn("Unable to copy entity", e); return null; } }
@Test public void test() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText("John went to London. I saw John there."); Person p = new Person(jCas); p.setBegin(0); p.setEnd(4); p.setValue("John"); p.addToIndexes(); processJCas(); List<Entity> select = new ArrayList<>(JCasUtil.select(jCas, Person.class)); assertEquals(2, select.size()); assertEquals("John", select.get(0).getValue()); assertEquals("John", select.get(1).getValue()); }
/** * Add an annotation to the JCas index, notifying UimaMonitor of the fact we have done so * * @param annot Annotation(s) to add */ public void add(Collection<? extends Annotation> annotations) { for (Annotation annot : annotations) { annot.addToIndexes(); monitor.entityAdded(annot.getType().getName()); if (annot instanceof Entity) { Entity entity = (Entity) annot; // Add in a value if it doesn't have one if (Strings.isNullOrEmpty(entity.getValue())) { entity.setValue(annot.getCoveredText()); } addToHistory(annot.getCAS(), HistoryEvents.createAdded((Recordable) annot, referrer)); } } }
/** * Add an annotation to the JCas index, notifying UimaMonitor of the fact we have done so * * @param annot Annotation(s) to add */ public void add(Collection<? extends Annotation> annotations) { for (Annotation annot : annotations) { annot.addToIndexes(); monitor.entityAdded(annot.getType().getName()); if (annot instanceof Entity) { Entity entity = (Entity) annot; // Add in a value if it doesn't have one if (Strings.isNullOrEmpty(entity.getValue())) { entity.setValue(annot.getCoveredText()); } addToHistory(annot.getCAS(), HistoryEvents.createAdded((Recordable) annot, referrer)); } } }
@Test public void testBracketsClose() throws Exception { AnalysisEngine cleanBracketAE = AnalysisEngineFactory.createEngine(CleanPunctuation.class); String t = "(Oh dear), we've pulled out too many (closing brackets!))"; jCas.setDocumentText(t); Annotations.createEntity(jCas, 0, t.length(), TEXT_BRACKETS); cleanBracketAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Entity.class).size()); Entity ent = JCasUtil.selectByIndex(jCas, Entity.class, 0); assertEquals("(Oh dear), we've pulled out too many (closing brackets!)", ent.getCoveredText()); assertEquals("(Oh dear), we've pulled out too many (closing brackets!)", ent.getValue()); }
@Test public void testBracketsNone() throws Exception { AnalysisEngine cleanBracketAE = AnalysisEngineFactory.createEngine(CleanPunctuation.class); jCas.setDocumentText(TEXT); Annotations.createEntity(jCas, 0, TEXT.length(), TEXT); cleanBracketAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Entity.class).size()); Entity ent = JCasUtil.selectByIndex(jCas, Entity.class, 0); assertEquals(TEXT, ent.getCoveredText()); assertEquals(TEXT, ent.getValue()); } }
@Test public void testBracketsOpen() throws Exception { AnalysisEngine cleanBracketAE = AnalysisEngineFactory.createEngine(CleanPunctuation.class); jCas.setDocumentText(TEXT_BRACKETS); Annotations.createEntity(jCas, 0, jCas.getDocumentText().length(), TEXT_BRACKETS); cleanBracketAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Entity.class).size()); Entity ent = JCasUtil.selectByIndex(jCas, Entity.class, 0); assertEquals("(Oh dear), we've (pulled out) too many opening brackets", ent.getCoveredText()); assertEquals("(Oh dear), we've (pulled out) too many opening brackets", ent.getValue()); }
@Test public void testWithText() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText("John went to London. I saw John there. He's a great guy John."); new Text(jCas, 0, 21).addToIndexes(); // Omit the middle John new Text(jCas, 40, jCas.getDocumentText().length()).addToIndexes(); Person p = new Person(jCas); p.setBegin(0); p.setEnd(4); p.setValue("John"); p.addToIndexes(); processJCas(); List<Entity> select = new ArrayList<>(JCasUtil.select(jCas, Person.class)); assertEquals(2, select.size()); assertEquals("John", select.get(0).getValue()); assertEquals("John", select.get(1).getValue()); assertTrue(select.get(1).getBegin() > 40); }
@Test public void testCopyEntity() { final Entity e = new Entity(jCas); e.setBegin(0); e.setBegin(5); e.setValue("value"); e.addToIndexes(); final Entity copyEntity = ComparableEntitySpanUtils.copyEntity(jCas, 10, 20, e); copyEntity.addToIndexes(); final List<Entity> select = new ArrayList<>(JCasUtil.select(jCas, Entity.class)); assertEquals(2, select.size()); assertSame(e, select.get(0)); assertEquals("value", select.get(1).getValue()); assertEquals(10, select.get(1).getBegin()); assertEquals(20, select.get(1).getEnd()); }
@Override public void validate(T t) { super.validate(t); assertEquals(getValue(), t.getValue()); assertTrue(minConfidence <= t.getConfidence()); } }
@Test public void testEntities() throws Exception { BaleenCollectionReader bcr = getCollectionReader( Re3dReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString(), Re3dReader.PARAM_ENTITIES, true, Re3dReader.PARAM_RANDOM_DATES, true); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); AnnotationIndex<Entity> annotationIndex = jCas.getAnnotationIndex(Entity.class); contains(annotationIndex, e -> e.getValue().equals("Location")); contains(annotationIndex, e -> e.getBegin() == 100); contains(annotationIndex, e -> e.getEnd() == 118); contains(annotationIndex, e -> e.getValue().equals("Organisation Ltd")); contains(annotationIndex, e -> e.getBegin() == 12); contains(annotationIndex, e -> e.getEnd() == 18); bcr.close(); }
@Test public void testMissingType() throws Exception { AnalysisEngine regexAE = AnalysisEngineFactory.createEngine( Custom.class, Custom.PARAM_PATTERN, DIGIT_REGEX, Custom.PARAM_CASE_SENSITIVE, true); jCas.setDocumentText(TEXT); regexAE.process(jCas); assertEquals(1, JCasUtil.select(jCas, Entity.class).size()); Entity e1 = JCasUtil.selectByIndex(jCas, Entity.class, 0); assertNotNull(e1); assertEquals(P123, e1.getCoveredText()); assertEquals(P123, e1.getValue()); regexAE.destroy(); }