/** * Creates a new {@link BratNameSampleStream}. * @param sentDetector a {@link SentenceDetector} instance * @param tokenizer a {@link Tokenizer} instance * @param samples a {@link BratDocument} {@link ObjectStream} */ public BratNameSampleStream(SentenceDetector sentDetector, Tokenizer tokenizer, ObjectStream<BratDocument> samples) { super(samples); this.parser = new BratDocumentParser(sentDetector, tokenizer, null); }
@Override public String toString() { return super.toString() + " " + attachedTo + (value != null ? " " + value : ""); } }
private BratNameSampleStream createNameSampleWith(String nameContainsFilter, Set<String> nameTypes) throws IOException { Map<String, String> typeToClassMap = new HashMap<>(); BratAnnotationStreamTest.addEntityTypes(typeToClassMap); AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap); File dir = new File(this.getClass().getResource("/opennlp/tools/formats/brat/").getFile()); FileFilter fileFilter = pathname -> pathname.getName().contains(nameContainsFilter); ObjectStream<BratDocument> bratDocumentStream = new BratDocumentStream(config, dir, false, fileFilter); return new BratNameSampleStream(new NewlineSentenceDetector(), WhitespaceTokenizer.INSTANCE, bratDocumentStream, nameTypes); }
@Override public String toString() { return super.toString() + " " + Arrays.toString(spans) + " " + getCoveredText(); } }
@Test public void testParsingEntities() throws Exception { Map<String, String> typeToClassMap = new HashMap<>(); addEntityTypes(typeToClassMap); AnnotationConfiguration annConfig = new AnnotationConfiguration(typeToClassMap); ObjectStream<BratAnnotation> annStream = creatBratAnnotationStream(annConfig, "/opennlp/tools/formats/brat/voa-with-entities.ann"); // TODO: Test if we get the entities ... we expect! BratAnnotation ann; while ((ann = annStream.read()) != null) { System.out.println(ann); } }
@Override BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException { return new RelationAnnotation(tokens[BratAnnotationParser.ID_OFFSET].getCoveredText(line).toString(), tokens[BratAnnotationParser.TYPE_OFFSET].getCoveredText(line).toString(), parseArg(tokens[ARG1_OFFSET].getCoveredText(line).toString()), parseArg(tokens[ARG2_OFFSET].getCoveredText(line).toString())); } }
private void checkNote(BratAnnotation annotation, String expectedCoveredText, String expectedNote) { Assert.assertTrue(annotation instanceof SpanAnnotation); SpanAnnotation spanAnn = (SpanAnnotation) annotation; Assert.assertEquals(expectedCoveredText, spanAnn.getCoveredText()); Assert.assertEquals(expectedNote, spanAnn.getNote()); }
@Override protected List<NameSample> read(BratDocument sample) throws IOException { return parser.parse(sample); } }
@Test(expected = RuntimeException.class) public void readOverlapFail() throws IOException { BratNameSampleStream stream = createNameSampleWith("overlapping", null); NameSample sample = stream.read(); while (sample != null) { sample = stream.read(); } }
private ObjectStream<BratAnnotation> creatBratAnnotationStream( AnnotationConfiguration conf, String file) { InputStream in = BratAnnotationStreamTest.class.getResourceAsStream(file); return new BratAnnotationStream(conf, "testing", in); }
public static AnnotationConfiguration parse(File annConfigFile) throws IOException { try (InputStream in = new BufferedInputStream(new FileInputStream(annConfigFile))) { return parse(in); } } }
public static void registerFactory() { StreamFactoryRegistry.registerFactory(NameSample.class, "brat", new BratNameSampleStreamFactory()); } }
protected AnnotatorNoteAnnotation(String id, String attachedId, String note) { super(id, "#AnnotationNote"); this.attachedId = attachedId; this.setNote(note); }
private boolean isSpanAnnotation(BratAnnotation ann) { if (ann instanceof SpanAnnotation && (nameTypes == null || nameTypes.contains(ann.getType()))) { return true; } return false; } }
@Override BratAnnotation parse(Span[] tokens, CharSequence line) throws IOException { Span noteSpan = new Span( tokens[START_VALUE_OFFSET].getStart(), tokens[tokens.length - 1].getEnd() ); return new AnnotatorNoteAnnotation(tokens[ID_OFFSET].getCoveredText(line).toString(), tokens[ATTACH_TO_OFFSET].getCoveredText(line).toString(), noteSpan.getCoveredText(line).toString()); } }
@Test(expected = IllegalArgumentException.class) public void emptySample() throws IOException { createNameSampleWith("overlapping", Collections.emptySet()); }
/** * Creates a new {@link BratNameSampleStream}. * @param sentDetector a {@link SentenceDetector} instance * @param tokenizer a {@link Tokenizer} instance * @param samples a {@link BratDocument} {@link ObjectStream} * @param nameTypes the name types to use or null if all name types */ public BratNameSampleStream(SentenceDetector sentDetector, Tokenizer tokenizer, ObjectStream<BratDocument> samples, Set<String> nameTypes) { super(samples); this.parser = new BratDocumentParser(sentDetector, tokenizer, nameTypes); }
@Test public void readNoOverlap() throws IOException { BratNameSampleStream stream = createNameSampleWith("-entities.", null); int count = 0; NameSample sample = stream.read(); while (sample != null) { count++; sample = stream.read(); } Assert.assertEquals(8, count); }
@Test public void readOverlapFilter() throws IOException { BratNameSampleStream stream = createNameSampleWith("overlapping", Collections.singleton("Person")); int count = 0; NameSample sample = stream.read(); while (sample != null) { count++; sample = stream.read(); } Assert.assertEquals(8, count); } }