private Map<Integer, String> getEntityInsertPositions(JCas jCas) { Map<Integer, String> insertPositions = new TreeMap<>(); Map<Integer, List<Entity>> entityStartPositions = new HashMap<>(); for (Entity e : JCasUtil.select(jCas, Entity.class)) { if (insertPositions.containsKey(e.getBegin())) { List<Entity> entities = entityStartPositions.getOrDefault(e.getBegin(), new ArrayList<>()); long eCount = entities.stream().filter(e2 -> e2.getEnd() > e.getEnd()).count(); String[] spans = insertPositions.get(e.getBegin()).split("(?<=>)"); insertPositions.put(e.getBegin(), joinSpans(eCount, e, spans)); } else { insertPositions.put(e.getBegin(), generateSpanStart(e)); } List<Entity> entities = entityStartPositions.getOrDefault(e.getBegin(), new ArrayList<>()); entities.add(e); entityStartPositions.put(e.getBegin(), entities); String end = insertPositions.getOrDefault(e.getEnd(), ""); end = "</span>" + end; insertPositions.put(e.getEnd(), end); } return insertPositions; }
protected CorefMention corefMention(Entity entity, int chainId) { int headIndex = 0; for (int i = 0; i < entities.size(); i++) { Optional<Entity> optional = entities.get(i); if (optional.isPresent() && entity.equals(optional.get())) { headIndex = i; break; } } return new CorefMention( sentenceIndex, headIndex, entity.getBegin() - getBegin(), entity.getEnd() - getBegin(), chainId); }
protected CorefMention corefMention(Entity entity, int chainId) { int headIndex = 0; for (int i = 0; i < entities.size(); i++) { Optional<Entity> optional = entities.get(i); if (optional.isPresent() && entity.equals(optional.get())) { headIndex = i; break; } } return new CorefMention( sentenceIndex, headIndex, entity.getBegin() - getBegin(), entity.getEnd() - getBegin(), chainId); }
@Override protected String print(Entity t) { final StringBuilder sb = new StringBuilder(); writeLine(sb, "Value", t.getValue()); writeLine(sb, "Type", t.getTypeName()); writeLine(sb, "Span", String.format("%d -> %d", t.getBegin(), t.getEnd())); return sb.toString(); } }
mention.setBaleenId(baleenExternalId); mention.setBegin(e.getBegin()); mention.setEnd(e.getEnd());
@Test public void testCopyEntity() { final Entity e = new Entity(jCas); e.setBegin(0); e.setBegin(5); e.setValue("value"); e.addToIndexes(); final Entity copyEntity = ComparableEntitySpanUtils.copyEntity(jCas, 10, 20, e); copyEntity.addToIndexes(); final List<Entity> select = new ArrayList<>(JCasUtil.select(jCas, Entity.class)); assertEquals(2, select.size()); assertSame(e, select.get(0)); assertEquals("value", select.get(1).getValue()); assertEquals(10, select.get(1).getBegin()); assertEquals(20, select.get(1).getEnd()); }
.append(FIELD_SUBTYPE, entity.getSubType()) .append(FIELD_VALUE, entity.getValue()) .append(FIELD_BEGIN, entity.getBegin()) .append(FIELD_END, entity.getEnd());
@Test public void testWithText() throws AnalysisEngineProcessException, ResourceInitializationException { jCas.setDocumentText("John went to London. I saw John there. He's a great guy John."); new Text(jCas, 0, 21).addToIndexes(); // Omit the middle John new Text(jCas, 40, jCas.getDocumentText().length()).addToIndexes(); Person p = new Person(jCas); p.setBegin(0); p.setEnd(4); p.setValue("John"); p.addToIndexes(); processJCas(); List<Entity> select = new ArrayList<>(JCasUtil.select(jCas, Person.class)); assertEquals(2, select.size()); assertEquals("John", select.get(0).getValue()); assertEquals("John", select.get(1).getValue()); assertTrue(select.get(1).getBegin() > 40); }
@Test public void testEntities() throws Exception { BaleenCollectionReader bcr = getCollectionReader( Re3dReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString(), Re3dReader.PARAM_ENTITIES, true, Re3dReader.PARAM_RANDOM_DATES, true); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); AnnotationIndex<Entity> annotationIndex = jCas.getAnnotationIndex(Entity.class); contains(annotationIndex, e -> e.getValue().equals("Location")); contains(annotationIndex, e -> e.getBegin() == 100); contains(annotationIndex, e -> e.getEnd() == 118); contains(annotationIndex, e -> e.getValue().equals("Organisation Ltd")); contains(annotationIndex, e -> e.getBegin() == 12); contains(annotationIndex, e -> e.getEnd() == 18); bcr.close(); }
@Test public void testRelations() throws Exception { BaleenCollectionReader bcr = getCollectionReader( Re3dReader.PARAM_FOLDER, tmpDir.toAbsolutePath().toString(), Re3dReader.PARAM_ENTITIES, true, Re3dReader.PARAM_RELATIONSHIPS, true, Re3dReader.PARAM_RANDOM_DATES, true); assertTrue(bcr.doHasNext()); bcr.getNext(jCas.getCas()); AnnotationIndex<Entity> entityIndex = jCas.getAnnotationIndex(Entity.class); Entity source = Streams.stream(entityIndex).filter(e -> e.getBegin() == 12).findAny().get(); Entity target = Streams.stream(entityIndex).filter(e -> e.getBegin() == 9).findAny().get(); AnnotationIndex<Relation> relationIndex = jCas.getAnnotationIndex(Relation.class); contains(relationIndex, r -> r.getBegin() == 11); contains(relationIndex, r -> r.getEnd() == 12); contains(relationIndex, r -> r.getValue().equals("")); contains(relationIndex, r -> r.getRelationshipType().equals("HasAttrOf")); contains(relationIndex, r -> r.getSource().equals(source)); contains(relationIndex, r -> r.getTarget().equals(target)); bcr.close(); }
addProperty(vReference, PROPERTY_BEGIN, e.getBegin()); addProperty(vReference, PROPERTY_END, e.getEnd());
assertEquals(entity.getBegin(), 0); assertEquals(entity.getEnd(), 5); assertEquals(paragraph.getBegin(), 0);
assertEquals(12 + 3, annotation.getBegin()); assertEquals(12 + 7, annotation.getEnd());
@Test public void testWholeDocument() { final TextBlock block = new TextBlock(jCas); assertTrue(block.isWholeDocument()); assertEquals(0, block.getBegin()); assertEquals(DOC_TEXT, block.getCoveredText()); assertEquals(DOC_TEXT, block.getDocumentText()); assertEquals(DOC_TEXT.length(), block.getEnd()); assertSame(jCas, block.getJCas()); assertNull(block.getText()); final Entity annotation = block.newAnnotation(Entity.class, 3, 7); assertNotNull(annotation); assertEquals(3, annotation.getBegin()); assertEquals(7, annotation.getEnd()); final Person p = new Person(jCas); block.setBeginAndEnd(p, 2, 10); assertEquals(2, p.getBegin()); assertEquals(10, p.getEnd()); assertEquals(1, block.toDocumentOffset(1)); assertEquals(1, block.toBlockOffset(1)); assertEquals(3, block.select(Buzzword.class).size()); } }