@Test public void testEncodeNoNames() { NameSample nameSample = new NameSample("Once upon a time.".split(" "), new Span[] {}, true); String[] expected = new String[] {OTHER, OTHER, OTHER, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("Only 'Other' is expected.", expected, acutal); }
@Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj instanceof NameSample) { NameSample a = (NameSample) obj; return Arrays.equals(getSentence(), a.getSentence()) && Arrays.equals(getNames(), a.getNames()) && Arrays.equals(getAdditionalContext(), a.getAdditionalContext()) && isClearAdaptiveDataSet() == a.isClearAdaptiveDataSet(); } return false; }
namesBySentence[i] = sample.getNames(); Span[] tokens = new Span[sample.getSentence().length]; for (int ti = 0; ti < sample.getSentence().length; ti++) { int tokenBegin = text.length(); text.append(sample.getSentence()[ti]); text.append(" "); tokens[ti] = new Span(tokenBegin, text.length()); document.add(NameSample.parse(line, false));
@Override public void missclassified(NameSample reference, NameSample prediction) { printError(reference.getId(), reference.getNames(), prediction.getNames(), reference, prediction, reference.getSentence()); }
private void statsAdd(NameSample reference, NameSample prediction) { String[] refTags = sequenceCodec.encode(reference.getNames(), reference.getSentence().length); String[] predTags = sequenceCodec.encode(prediction.getNames(), prediction.getSentence().length); // we don' want it to compute token frequency, so we pass an array of empty strings instead // of tokens getStats().add(new String[reference.getSentence().length], refTags, predTags); }
public NameSample read() throws IOException { NameSample sample = samples.read(); if (sample != null) { List<Span> filteredNames = new ArrayList<>(); for (Span name : sample.getNames()) { if (types.contains(name.getType())) { filteredNames.add(name); } } return new NameSample(sample.getId(), sample.getSentence(), filteredNames.toArray(new Span[filteredNames.size()]), null, sample.isClearAdaptiveDataSet()); } else { return null; } } }
/** * Checks that if the name is the last token in a sentence it is still outputed * correctly. */ @Test public void testNameAtEnd() { String[] sentence = new String[] { "My", "name", "is", "Anna" }; NameSample sample = new NameSample(sentence, new Span[]{new Span(3, 4)}, false); Assert.assertEquals("My name is <START> Anna <END>", sample.toString()); }
@Override public void endElement(String name) { if (NAME_ELEMENT_NAMES.contains(name)) { Span nameSpan = incompleteNames.pop(); nameSpan = new Span(nameSpan.getStart(), text.size(), nameSpan.getType()); names.add(nameSpan); } if (MucElementNames.CONTENT_ELEMENTS.contains(name)) { storedSamples.add(new NameSample(text.toArray(new String[text.size()]), names.toArray(new Span[names.size()]), isClearAdaptiveData)); if (isClearAdaptiveData) { isClearAdaptiveData = false; } text.clear(); names.clear(); isInsideContentElement = false; } } }
@Test public void testParsingGermanSample() throws IOException { ObjectStream<NameSample> sampleStream = openData(LANGUAGE.DE, GERMAN_SAMPLE); NameSample personName = sampleStream.read(); Assert.assertNotNull(personName); Assert.assertEquals(5, personName.getSentence().length); Assert.assertEquals(0, personName.getNames().length); Assert.assertEquals(true, personName.isClearAdaptiveDataSet()); }
if (catchingName) { throw new IOException("Found unexpected annotation" + " while handling a name sequence: " + errorTokenWithContext(parts, pi)); if (nameTypeFromSample != null) { if (nameTypeFromSample.length() == 0) { throw new IOException("Missing a name type: " + errorTokenWithContext(parts, pi)); throw new IOException("Found unexpected annotation: " + errorTokenWithContext(parts, pi)); Span[] names = nameList.toArray(new Span[nameList.size()]); return new NameSample(sentence, names, isClearAdaptiveData);
@Test public void testNameSampleSerDe() throws IOException { NameSample nameSample = createGoldSample(); ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); ObjectOutput out = new ObjectOutputStream(byteArrayOutputStream); out.writeObject(nameSample); out.flush(); byte[] bytes = byteArrayOutputStream.toByteArray(); ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bytes); ObjectInput objectInput = new ObjectInputStream(byteArrayInputStream); NameSample deSerializedNameSample = null; try { deSerializedNameSample = (NameSample) objectInput.readObject(); } catch (ClassNotFoundException e) { // do nothing } Assert.assertNotNull(deSerializedNameSample); Assert.assertArrayEquals(nameSample.getSentence(), deSerializedNameSample.getSentence()); Assert.assertArrayEquals(nameSample.getNames(), deSerializedNameSample.getNames()); Assert.assertArrayEquals(nameSample.getAdditionalContext(), deSerializedNameSample.getAdditionalContext()); }
@Test public void testMissingRightContraction() throws IOException { Assert.assertEquals(new Span(0, 1, "person"), samples.get(7).getNames()[0]); Assert.assertEquals(new Span(3, 4, "person"), samples.get(7).getNames()[1]); Assert.assertEquals(new Span(5, 6, "person"), samples.get(7).getNames()[2]); }
@Override protected String[] toSentence(NameSample sample) { return sample.getSentence(); } }
/** * Tests if an additional space is correctly treated as one space. * * @throws Exception */ @Test public void testParseWithAdditionalSpace() throws Exception { String line = "<START> M . K . <END> <START> Schwitters <END> ? <START> Heartfield <END> ?"; NameSample test = NameSample.parse(line, false); Assert.assertEquals(8, test.getSentence().length); }
/** * Checks if it accepts name type with some special characters */ @Test public void testTypeWithSpecialChars() throws Exception { NameSample parsedSample = NameSample .parse( "<START:type-1> U . S . <END> " + "President <START:type_2> Barack Obama <END> is considering sending " + "additional American forces to <START:type_3-/;.,&%$> Afghanistan <END> .", false); Assert.assertEquals(3, parsedSample.getNames().length); Assert.assertEquals("type-1", parsedSample.getNames()[0].getType()); Assert.assertEquals("type_2", parsedSample.getNames()[1].getType()); Assert.assertEquals("type_3-/;.,&%$", parsedSample.getNames()[2].getType()); }
@Test public void testParsingDutchSample() throws IOException { ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample"); NameSample personName = sampleStream.read(); Assert.assertEquals(0, personName.getNames().length); Assert.assertTrue(personName.isClearAdaptiveDataSet()); personName = sampleStream.read(); Assert.assertFalse(personName.isClearAdaptiveDataSet()); Assert.assertNull(sampleStream.read()); }
public NameSample read() throws IOException { String token = samples.read(); boolean isClearAdaptiveData = false; // An empty line indicates the begin of a new article // for which the adaptive data in the feature generators // must be cleared while (token != null && token.trim().length() == 0) { isClearAdaptiveData = true; token = samples.read(); } if (token != null) { return NameSample.parse(token, isClearAdaptiveData); } else { return null; } } }
public DocumentSample read() throws IOException { List<NameSample> document = new ArrayList<>(); if (beginSample == null) { // Assume that the clear flag is set beginSample = samples.read(); } // Underlying stream is exhausted! if (beginSample == null) { return null; } document.add(beginSample); NameSample sample; while ((sample = samples.read()) != null) { if (sample.isClearAdaptiveDataSet()) { beginSample = sample; break; } document.add(sample); } // Underlying stream is exhausted, // next call must return null if (sample == null) { beginSample = null; } return new DocumentSample(document.toArray(new NameSample[document.size()])); }
/** * Checks if could create a NameSample with NameTypes, generate the * string representation and validate it. */ @Test public void testWithTypesToString() throws Exception { String nameSampleStr = createSimpleNameSample(true).toString(); Assert.assertEquals("<START:Location> U . S . <END> President <START:Person>" + " Barack Obama <END> " + "is considering sending additional American forces to <START:Location> Afghanistan <END> .", nameSampleStr); NameSample parsedSample = NameSample.parse("<START:Location> U . S . <END> " + "President <START:Person> Barack Obama <END> is considering sending " + "additional American forces to <START:Location> Afghanistan <END> .", false); Assert.assertEquals(createSimpleNameSample(true), parsedSample); }
/** * Checks if could create a NameSample without NameTypes, generate the * string representation and validate it. */ @Test public void testNoTypesToString() { String nameSampleStr = createSimpleNameSample(false).toString(); Assert.assertEquals("<START> U . S . <END> President <START> Barack Obama <END>" + " is considering " + "sending additional American forces to <START> Afghanistan <END> .", nameSampleStr); }