opennlp.tools.namefind.NameSample java code examples

@Test
public void testEncodeNoNames() {
 NameSample nameSample = new NameSample("Once upon a time.".split(" "), new Span[] {}, true);
 String[] expected = new String[] {OTHER, OTHER, OTHER, OTHER};
 String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length);
 Assert.assertArrayEquals("Only 'Other' is expected.", expected, acutal);
}

@Override
public boolean equals(Object obj) {
 if (this == obj) {
  return true;
 }
 if (obj instanceof NameSample) {
  NameSample a = (NameSample) obj;
  return Arrays.equals(getSentence(), a.getSentence()) &&
    Arrays.equals(getNames(), a.getNames()) &&
    Arrays.equals(getAdditionalContext(), a.getAdditionalContext()) &&
    isClearAdaptiveDataSet() == a.isClearAdaptiveDataSet();
 }
 return false;
}

 namesBySentence[i] = sample.getNames();
 Span[] tokens = new Span[sample.getSentence().length];
 for (int ti = 0; ti < sample.getSentence().length; ti++) {
  int tokenBegin = text.length();
  text.append(sample.getSentence()[ti]);
  text.append(" ");
  tokens[ti] = new Span(tokenBegin, text.length());
document.add(NameSample.parse(line, false));

@Override
public void missclassified(NameSample reference, NameSample prediction) {
 printError(reference.getId(), reference.getNames(), prediction.getNames(), reference,
   prediction, reference.getSentence());
}

private void statsAdd(NameSample reference, NameSample prediction) {
 String[] refTags = sequenceCodec.encode(reference.getNames(), reference.getSentence().length);
 String[] predTags = sequenceCodec.encode(prediction.getNames(), prediction.getSentence().length);
 // we don' want it to compute token frequency, so we pass an array of empty strings instead
 // of tokens
 getStats().add(new String[reference.getSentence().length], refTags, predTags);
}

 public NameSample read() throws IOException {

  NameSample sample = samples.read();

  if (sample != null) {

   List<Span> filteredNames = new ArrayList<>();

   for (Span name : sample.getNames()) {
    if (types.contains(name.getType())) {
     filteredNames.add(name);
    }
   }

   return new NameSample(sample.getId(), sample.getSentence(),
     filteredNames.toArray(new Span[filteredNames.size()]), null, sample.isClearAdaptiveDataSet());
  }
  else {
   return null;
  }
 }
}

/**
 * Checks that if the name is the last token in a sentence it is still outputed
 * correctly.
 */
@Test
public void testNameAtEnd() {
 String[] sentence = new String[] {
   "My",
   "name",
   "is",
   "Anna"
 };
 NameSample sample = new NameSample(sentence, new Span[]{new Span(3, 4)}, false);
 Assert.assertEquals("My name is <START> Anna <END>", sample.toString());
}

 @Override
 public void endElement(String name) {

  if (NAME_ELEMENT_NAMES.contains(name)) {
   Span nameSpan = incompleteNames.pop();
   nameSpan = new Span(nameSpan.getStart(), text.size(), nameSpan.getType());
   names.add(nameSpan);
  }

  if (MucElementNames.CONTENT_ELEMENTS.contains(name)) {
   storedSamples.add(new NameSample(text.toArray(new String[text.size()]),
     names.toArray(new Span[names.size()]), isClearAdaptiveData));

   if (isClearAdaptiveData) {
    isClearAdaptiveData = false;
   }

   text.clear();
   names.clear();
   isInsideContentElement = false;
  }
 }
}

@Test
public void testParsingGermanSample() throws IOException {
 ObjectStream<NameSample> sampleStream = openData(LANGUAGE.DE, GERMAN_SAMPLE);
 NameSample personName = sampleStream.read();
 Assert.assertNotNull(personName);
 Assert.assertEquals(5, personName.getSentence().length);
 Assert.assertEquals(0, personName.getNames().length);
 Assert.assertEquals(true, personName.isClearAdaptiveDataSet());
}

  if (catchingName) {
   throw new IOException("Found unexpected annotation" +
     " while handling a name sequence: " + errorTokenWithContext(parts, pi));
  if (nameTypeFromSample != null) {
   if (nameTypeFromSample.length() == 0) {
    throw new IOException("Missing a name type: " + errorTokenWithContext(parts, pi));
   throw new IOException("Found unexpected annotation: " + errorTokenWithContext(parts, pi));
Span[] names = nameList.toArray(new Span[nameList.size()]);
return new NameSample(sentence, names, isClearAdaptiveData);

@Test
public void testNameSampleSerDe() throws IOException {
 NameSample nameSample = createGoldSample();
 ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
 ObjectOutput out = new ObjectOutputStream(byteArrayOutputStream);
 out.writeObject(nameSample);
 out.flush();
 byte[] bytes = byteArrayOutputStream.toByteArray();
 ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bytes);
 ObjectInput objectInput = new ObjectInputStream(byteArrayInputStream);
 NameSample deSerializedNameSample = null;
 try {
  deSerializedNameSample = (NameSample) objectInput.readObject();
 } catch (ClassNotFoundException e) {
  // do nothing
 }
 Assert.assertNotNull(deSerializedNameSample);
 Assert.assertArrayEquals(nameSample.getSentence(), deSerializedNameSample.getSentence());
 Assert.assertArrayEquals(nameSample.getNames(), deSerializedNameSample.getNames());
 Assert.assertArrayEquals(nameSample.getAdditionalContext(),
   deSerializedNameSample.getAdditionalContext());
}

@Test
public void testMissingRightContraction() throws IOException {
 Assert.assertEquals(new Span(0, 1, "person"), samples.get(7).getNames()[0]);
 Assert.assertEquals(new Span(3, 4, "person"), samples.get(7).getNames()[1]);
 Assert.assertEquals(new Span(5, 6, "person"), samples.get(7).getNames()[2]);
}

 @Override
 protected String[] toSentence(NameSample sample) {
  return sample.getSentence();
 }
}

/**
 * Tests if an additional space is correctly treated as one space.
 *
 * @throws Exception
 */
@Test
public void testParseWithAdditionalSpace() throws Exception {
 String line = "<START> M . K . <END> <START> Schwitters <END> ?  <START> Heartfield <END> ?";
 NameSample test = NameSample.parse(line, false);
 Assert.assertEquals(8, test.getSentence().length);
}

/**
 * Checks if it accepts name type with some special characters
 */
@Test
public void testTypeWithSpecialChars() throws Exception {
 NameSample parsedSample = NameSample
   .parse(
     "<START:type-1> U . S . <END> "
       + "President <START:type_2> Barack Obama <END> is considering sending "
       + "additional American forces to <START:type_3-/;.,&%$> Afghanistan <END> .",
     false);
 Assert.assertEquals(3, parsedSample.getNames().length);
 Assert.assertEquals("type-1", parsedSample.getNames()[0].getType());
 Assert.assertEquals("type_2", parsedSample.getNames()[1].getType());
 Assert.assertEquals("type_3-/;.,&%$", parsedSample.getNames()[2].getType());
}

@Test
public void testParsingDutchSample() throws IOException {
 ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample");
 NameSample personName = sampleStream.read();
 Assert.assertEquals(0, personName.getNames().length);
 Assert.assertTrue(personName.isClearAdaptiveDataSet());
 personName = sampleStream.read();
 Assert.assertFalse(personName.isClearAdaptiveDataSet());
 Assert.assertNull(sampleStream.read());
}

 public NameSample read() throws IOException {
  String token = samples.read();

  boolean isClearAdaptiveData = false;

  // An empty line indicates the begin of a new article
  // for which the adaptive data in the feature generators
  // must be cleared
  while (token != null && token.trim().length() == 0) {
   isClearAdaptiveData = true;
   token = samples.read();
  }

  if (token != null) {
   return NameSample.parse(token, isClearAdaptiveData);
  }
  else {
   return null;
  }
 }
}

public DocumentSample read() throws IOException {
 List<NameSample> document = new ArrayList<>();
 if (beginSample == null) {
  // Assume that the clear flag is set
  beginSample = samples.read();
 }
 // Underlying stream is exhausted!
 if (beginSample == null) {
  return null;
 }
 document.add(beginSample);
 NameSample sample;
 while ((sample = samples.read()) != null) {
  if (sample.isClearAdaptiveDataSet()) {
   beginSample = sample;
   break;
  }
  document.add(sample);
 }
 // Underlying stream is exhausted,
 // next call must return null
 if (sample == null) {
  beginSample = null;
 }
 return new DocumentSample(document.toArray(new NameSample[document.size()]));
}

/**
 * Checks if could create a NameSample with NameTypes, generate the
 * string representation and validate it.
 */
@Test
public void testWithTypesToString() throws Exception {
 String nameSampleStr = createSimpleNameSample(true).toString();
 Assert.assertEquals("<START:Location> U . S . <END> President <START:Person>" +
     " Barack Obama <END> " +
   "is considering sending additional American forces to <START:Location> Afghanistan <END> .",
   nameSampleStr);
 NameSample parsedSample = NameSample.parse("<START:Location> U . S . <END> " +
   "President <START:Person> Barack Obama <END> is considering sending " +
   "additional American forces to <START:Location> Afghanistan <END> .",
   false);
 Assert.assertEquals(createSimpleNameSample(true), parsedSample);
}

/**
 * Checks if could create a NameSample without NameTypes, generate the
 * string representation and validate it.
 */
@Test
public void testNoTypesToString() {
 String nameSampleStr = createSimpleNameSample(false).toString();
 Assert.assertEquals("<START> U . S . <END> President <START> Barack Obama <END>" +
   " is considering " +
   "sending additional American forces to <START> Afghanistan <END> .", nameSampleStr);
}

Javadoc

Class for holding names for a single unit of text.

Most used methods

<init>
Initializes the current instance.
getNames
getSentence
toString
getAdditionalContext
isClearAdaptiveDataSet
parse
errorTokenWithContext
getId
equals

Popular in Java

Updating database using SQL prepared statement
getExternalFilesDir (Context)
findViewById (Activity)
getSupportFragmentManager (FragmentActivity)
Time (java.sql)
Java representation of an SQL TIME value. Provides utilities to format and parse the time's represen
DecimalFormat (java.text)
A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
Table (com.google.common.collect)
A collection that associates an ordered pair of keys, called a row key and a column key, with a sing
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
JList (javax.swing)
From CI to AI: The AI layer in your organization

How to useNameSample in opennlp.tools.namefind

Best Java code snippets using opennlp.tools.namefind.NameSample (Showing top 20 results out of 315)

How to use
NameSample
in
opennlp.tools.namefind