opennlp.tools.formats.brat.BratDocument java code examples

@Test
public void testDocumentWithEntitiesParsing() throws IOException {
 Map<String, String> typeToClassMap = new HashMap<>();
 BratAnnotationStreamTest.addEntityTypes(typeToClassMap);
 AnnotationConfiguration config = new AnnotationConfiguration(typeToClassMap);
 InputStream txtIn = BratDocumentTest.class.getResourceAsStream(
   "/opennlp/tools/formats/brat/voa-with-entities.txt");
 InputStream annIn = BratDocumentTest.class.getResourceAsStream(
   "/opennlp/tools/formats/brat/voa-with-entities.ann");
 BratDocument doc = BratDocument.parseDocument(config, "voa-with-entities", txtIn, annIn);
 Assert.assertEquals("voa-with-entities", doc.getId());
 Assert.assertTrue(doc.getText().startsWith(" U . S .  President "));
 Assert.assertTrue(doc.getText().endsWith("multinational process . \n"));
 Assert.assertEquals(18, doc.getAnnotations().size());
 
 BratAnnotation annotation = doc.getAnnotation("T2");
 checkNote(annotation, "Barack Obama", "President Obama was the 44th U.S. president");
 annotation = doc.getAnnotation("T3");
 checkNote(annotation,"South Korea","The capital of South Korea is Seoul");
}

 public static BratDocument parseDocument(AnnotationConfiguration config, String id,
   InputStream txtIn, InputStream annIn) throws IOException {

  Reader txtReader = new InputStreamReader(txtIn, StandardCharsets.UTF_8);

  StringBuilder text = new StringBuilder();

  char[] cbuf = new char[1024];

  int len;
  while ((len = txtReader.read(cbuf)) > 0) {
   text.append(cbuf, 0, len);
  }

  Collection<BratAnnotation> annotations = new ArrayList<>();
  ObjectStream<BratAnnotation> annStream = new BratAnnotationStream(config, id, annIn);
  BratAnnotation ann;
  while ((ann = annStream.read()) != null) {
   annotations.add(ann);
  }
  annStream.close();

  return new BratDocument(config, id, text.toString(), annotations);
 }
}

Map<Integer, Span> coveredIndexes = new HashMap<>();
for (BratAnnotation ann : sample.getAnnotations()) {
 if (isSpanAnnotation(ann)) {
  entityIdSet.add(ann.getId());
for (Span sentence : sentDetector.sentPosDetect(sample.getText())) {
 Span conflictingName = coveredIndexes.get(sentence.getStart());
    sample.getId());
   sample.getText()).toString();
 for (BratAnnotation ann : sample.getAnnotations()) {
     entityIdSet.remove(ann.getId());
     entitySpan = entitySpan.trim(sample.getText());
     } else {
      System.err.println("Dropped entity " + entity.getId() + " ("
        + entitySpan.getCoveredText(sample.getText()) + ") " + " in document "
        + sample.getId() + ", it is not matching tokenization!");
 samples.add(new NameSample(sample.getId(), Span.spansToStrings(tokens, sentenceText),
   names.toArray(new Span[names.size()]), null, samples.size() == 0));
   sample.getId() + ", is not matching sentence segmentation!");

public BratDocument read() throws IOException {
 BratDocument doc = null;
 if (documentIdIterator.hasNext()) {
  String id = documentIdIterator.next();
  try (InputStream txtIn = new BufferedInputStream(new FileInputStream(id + ".txt"));
    InputStream annIn = new BufferedInputStream(new FileInputStream(id + ".ann"))) {
   doc = BratDocument.parseDocument(config, id, txtIn, annIn);
  }
 }
 return doc;
}

  "/opennlp/tools/formats/brat/opennlp-1193.ann");
BratDocument doc = BratDocument.parseDocument(config, "opennlp-1193", txtIn, annIn);
SpanAnnotation t1 = (SpanAnnotation) doc.getAnnotation("T1");
Assert.assertEquals(t1.getSpans()[0].getStart(), 0);
Assert.assertEquals(t1.getSpans()[0].getEnd(), 7);
Assert.assertEquals(t1.getSpans()[2].getEnd(), 24);
SpanAnnotation t2 = (SpanAnnotation) doc.getAnnotation("T2");
Assert.assertEquals(t2.getSpans()[0].getStart(), 26);
Assert.assertEquals(t2.getSpans()[0].getEnd(), 33);

Map<Integer, Span> coveredIndexes = new HashMap<>();
for (BratAnnotation ann : sample.getAnnotations()) {
 if (ann instanceof SpanAnnotation) {
  entityIdSet.add(ann.getId());
for (Span sentence : sentDetector.sentPosDetect(sample.getText())) {
 Span conflictingName = coveredIndexes.get(sentence.getStart());
    sample.getId());
   sample.getText()).toString();
 for (BratAnnotation ann : sample.getAnnotations()) {
    entityIdSet.remove(ann.getId());
    entitySpan = entitySpan.trim(sample.getText());
       + entitySpan.getCoveredText(sample.getText()) + ") " + " in document "
       + sample.getId() + ", it is not matching tokenization!");
 samples.add(new NameSample(sample.getId(), Span.spansToStrings(tokens, sentenceText),
   names.toArray(new Span[names.size()]), null, samples.size() == 0));
   sample.getId() + ", is not matching sentence segmentation!");

public BratDocument read() throws IOException {
 BratDocument doc = null;
 if (documentIdIterator.hasNext()) {
  String id = documentIdIterator.next();
  try (InputStream txtIn = new BufferedInputStream(new FileInputStream(id + ".txt"));
    InputStream annIn = new BufferedInputStream(new FileInputStream(id + ".ann"))) {
   doc = BratDocument.parseDocument(config, id, txtIn, annIn);
  }
 }
 return doc;
}

Map<Integer, Span> coveredIndexes = new HashMap<>();
for (BratAnnotation ann : sample.getAnnotations()) {
 if (isSpanAnnotation(ann)) {
  entityIdSet.add(ann.getId());
for (Span sentence : sentDetector.sentPosDetect(sample.getText())) {
 Span conflictingName = coveredIndexes.get(sentence.getStart());
    sample.getId());
   sample.getText()).toString();
 for (BratAnnotation ann : sample.getAnnotations()) {
     entityIdSet.remove(ann.getId());
     entitySpan = entitySpan.trim(sample.getText());
     } else {
      System.err.println("Dropped entity " + entity.getId() + " ("
        + entitySpan.getCoveredText(sample.getText()) + ") " + " in document "
        + sample.getId() + ", it is not matching tokenization!");
 samples.add(new NameSample(sample.getId(), Span.spansToStrings(tokens, sentenceText),
   names.toArray(new Span[names.size()]), null, samples.size() == 0));
   sample.getId() + ", is not matching sentence segmentation!");

public BratDocument read() throws IOException {
 BratDocument doc = null;
 if (documentIdIterator.hasNext()) {
  String id = documentIdIterator.next();
  try (InputStream txtIn = new BufferedInputStream(new FileInputStream(id + ".txt"));
    InputStream annIn = new BufferedInputStream(new FileInputStream(id + ".ann"))) {
   doc = BratDocument.parseDocument(config, id, txtIn, annIn);
  }
 }
 return doc;
}

 public static BratDocument parseDocument(AnnotationConfiguration config, String id,
   InputStream txtIn, InputStream annIn) throws IOException {

  Reader txtReader = new InputStreamReader(txtIn, StandardCharsets.UTF_8);

  StringBuilder text = new StringBuilder();

  char[] cbuf = new char[1024];

  int len;
  while ((len = txtReader.read(cbuf)) > 0) {
   text.append(cbuf, 0, len);
  }

  Collection<BratAnnotation> annotations = new ArrayList<>();
  ObjectStream<BratAnnotation> annStream = new BratAnnotationStream(config, id, annIn);
  BratAnnotation ann;
  while ((ann = annStream.read()) != null) {
   annotations.add(ann);
  }
  annStream.close();

  return new BratDocument(config, id, text.toString(), annotations);
 }
}

  "/opennlp/tools/formats/brat/opennlp-1193.ann");
BratDocument doc = BratDocument.parseDocument(config, "opennlp-1193", txtIn, annIn);

 public static BratDocument parseDocument(AnnotationConfiguration config, String id,
   InputStream txtIn, InputStream annIn) throws IOException {

  Reader txtReader = new InputStreamReader(txtIn, StandardCharsets.UTF_8);

  StringBuilder text = new StringBuilder();

  char[] cbuf = new char[1024];

  int len;
  while ((len = txtReader.read(cbuf)) > 0) {
   text.append(cbuf, 0, len);
  }

  Collection<BratAnnotation> annotations = new ArrayList<>();
  ObjectStream<BratAnnotation> annStream = new BratAnnotationStream(config, id, annIn);
  BratAnnotation ann;
  while ((ann = annStream.read()) != null) {
   annotations.add(ann);
  }
  annStream.close();

  return new BratDocument(config, id, text.toString(), annotations);
 }
}

Most used methods

Popular in Java

Start an intent from android
getExternalFilesDir (Context)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
getApplicationContext (Context)
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
Runnable (java.lang)
Represents a command that can be executed. Often used to run code in a different Thread.
ArrayList (java.util)
ArrayList is an implementation of List, backed by an array. All optional operations including adding
HashSet (java.util)
HashSet is an implementation of a Set. All optional operations (adding and removing) are supported.
SortedSet (java.util)
SortedSet is a Set which iterates over its elements in a sorted order. The order is determined eithe
JButton (javax.swing)
Top PhpStorm plugins

How to useBratDocument in opennlp.tools.formats.brat

Best Java code snippets using opennlp.tools.formats.brat.BratDocument (Showing top 12 results out of 315)

How to use
BratDocument
in
opennlp.tools.formats.brat