sample.getId()); System.err.println("Dropped entity " + entity.getId() + " (" + entitySpan.getCoveredText(sample.getText()) + ") " + " in document " + sample.getId() + ", it is not matching tokenization!"); samples.add(new NameSample(sample.getId(), Span.spansToStrings(tokens, sentenceText), names.toArray(new Span[names.size()]), null, samples.size() == 0)); sample.getId() + ", is not matching sentence segmentation!");
@Test public void testDocumentWithEntitiesParsing() throws IOException { Map<String, String> typeToClassMap = new HashMap<>(); BratAnnotationStreamTest.addEntityTypes(typeToClassMap); AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap); InputStream txtIn = BratDocumentTest.class.getResourceAsStream( "/opennlp/tools/formats/brat/voa-with-entities.txt"); InputStream annIn = BratDocumentTest.class.getResourceAsStream( "/opennlp/tools/formats/brat/voa-with-entities.ann"); BratDocument doc = BratDocument.parseDocument(config, "voa-with-entities", txtIn, annIn); Assert.assertEquals("voa-with-entities", doc.getId()); Assert.assertTrue(doc.getText().startsWith(" U . S . President ")); Assert.assertTrue(doc.getText().endsWith("multinational process . \n")); Assert.assertEquals(18, doc.getAnnotations().size()); BratAnnotation annotation = doc.getAnnotation("T2"); checkNote(annotation, "Barack Obama", "President Obama was the 44th U.S. president"); annotation = doc.getAnnotation("T3"); checkNote(annotation,"South Korea","The capital of South Korea is Seoul"); }
sample.getId()); System.err.println("Dropped entity " + entity.getId() + " (" + entitySpan.getCoveredText(sample.getText()) + ") " + " in document " + sample.getId() + ", it is not matching tokenization!"); samples.add(new NameSample(sample.getId(), Span.spansToStrings(tokens, sentenceText), names.toArray(new Span[names.size()]), null, samples.size() == 0)); sample.getId() + ", is not matching sentence segmentation!");
sample.getId()); System.err.println("Dropped entity " + entity.getId() + " (" + entitySpan.getCoveredText(sample.getText()) + ") " + " in document " + sample.getId() + ", it is not matching tokenization!"); samples.add(new NameSample(sample.getId(), Span.spansToStrings(tokens, sentenceText), names.toArray(new Span[names.size()]), null, samples.size() == 0)); sample.getId() + ", is not matching sentence segmentation!");