public NameSample read() throws IOException { NameSample sample = samples.read(); if (sample != null) { List<Span> filteredNames = new ArrayList<>(); for (Span name : sample.getNames()) { if (types.contains(name.getType())) { filteredNames.add(name); } } return new NameSample(sample.getId(), sample.getSentence(), filteredNames.toArray(new Span[filteredNames.size()]), null, sample.isClearAdaptiveDataSet()); } else { return null; } } }
private static NameSample createSimpleNameSampleB() { Span[] names = { new Span(0, 4, "Location"), new Span(14, 15, "Location") }; NameSample nameSample; nameSample = new NameSample(sentence, names, false); return nameSample; }
private static NameSample createSimpleNameSampleA() { Span[] names = { new Span(0, 4, "Location"), new Span(5, 7, "Person"), new Span(14, 15, "Location") }; NameSample nameSample; nameSample = new NameSample(sentence, names, false); return nameSample; }
@Test public void testEncodeNoNames() { NameSample nameSample = new NameSample("Once upon a time.".split(" "), new Span[] {}, true); String[] expected = new String[] {OTHER, OTHER, OTHER, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("Only 'Other' is expected.", expected, acutal); }
@Test public void testEncodeNoNames() { NameSample nameSample = new NameSample("Once upon a time.".split(" "), new Span[] {}, true); String[] expected = new String[] { OTHER, OTHER, OTHER, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("Only 'Other' is expected.", expected, actual); }
/** * Test if it fails to name spans are overlapping */ @Test(expected = RuntimeException.class) public void testOverlappingNameSpans() throws Exception { String[] sentence = {"A", "Place", "a", "time", "A", "Person", "."}; Span[] names = {new Span(0, 2, "Place"), new Span(3, 5, "Person"), new Span(2, 4, "Time")}; new NameSample(sentence, names, false); }
@Test public void testEncodeTripleTokenSpan() { String[] sentence = "Secretary - General Anders Fogh Rasmussen is from Denmark.".split(" "); Span[] singleSpan = new Span[] { new Span(3,6, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, OTHER, OTHER, A_START, A_CONTINUE, A_LAST, OTHER, OTHER, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Anders' should be 'start' only, 'Fogh' is 'inside', " + "'Rasmussen' is 'last' and the rest should be 'other'.", expected, acutal); }
@Test public void testEncodeSingleTokenSpan() { String[] sentence = "I called Julie again.".split(" "); Span[] spans = new Span[] { new Span(2,3, A_TYPE)}; NameSample nameSample = new NameSample(sentence, spans, true); String[] expected = new String[] {OTHER, OTHER, A_START, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Julie' should be 'start' only, the rest should be 'other'.", expected, actual); }
@Test public void testEncodeDoubleTokenSpan() { String[] sentence = "I saw Stefanie Schmidt today.".split(" "); Span[] span = new Span[] { new Span(2,4, A_TYPE)}; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, OTHER, A_START, A_CONTINUE, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " + "'continue' and the rest should be 'other'.", expected, actual); }
@Test public void testEncodeDoubleTokenSpanNoType() { final String DEFAULT_START = "default" + "-" + BioCodec.START; final String DEFAULT_CONTINUE = "default" + "-" + BioCodec.CONTINUE; String[] sentence = "I saw Stefanie Schmidt today.".split(" "); Span[] span = new Span[] { new Span(2,4, null)}; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, OTHER, DEFAULT_START, DEFAULT_CONTINUE, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is " + "'continue' and the rest should be 'other'.", expected, actual); }
@Test public void testEncodeSingleUnitTokenSpan() { String[] sentence = "I called Julie again.".split(" "); Span[] singleSpan = new Span[] { new Span(2,3, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, OTHER, A_UNIT, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Julie' should be 'unit' only, the rest should be 'other'.", expected, acutal); }
@Test public void testEncodeDoubleTokenSpan() { String[] sentence = "I saw Stefanie Schmidt today.".split(" "); Span[] singleSpan = new Span[] { new Span(2,4, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, OTHER, A_START, A_LAST, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("'Stefanie' should be 'start' only, 'Schmidt' is 'last' " + "and the rest should be 'other'.", expected, acutal); }
@Test public void testEncodeAdjacentUnitSpans() { String[] sentence = "word PersonA PersonB word".split(" "); Span[] singleSpan = new Span[] { new Span(1,2, A_TYPE), new Span(2, 3, A_TYPE)}; NameSample nameSample = new NameSample(sentence, singleSpan, true); String[] expected = new String[] {OTHER, A_UNIT, A_UNIT, OTHER}; String[] acutal = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals("Both PersonA and PersonB are 'unit' tags", expected, acutal); }
@Test public void testEncodeAdjacentSingleSpans() { String[] sentence = "something PersonA PersonB Something".split(" "); Span[] span = new Span[] { new Span(1,2, A_TYPE), new Span(2, 3, A_TYPE) }; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, A_START, A_START, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals(expected, actual); }
@Test public void testEncodeAdjacentSpans() { String[] sentence = "something PersonA PersonA PersonB Something".split(" "); Span[] span = new Span[] { new Span(1,3, A_TYPE), new Span(3, 4, A_TYPE) }; NameSample nameSample = new NameSample(sentence, span, true); String[] expected = new String[] {OTHER, A_START, A_CONTINUE, A_START, OTHER}; String[] actual = codec.encode(nameSample.getNames(), nameSample.getSentence().length); Assert.assertArrayEquals(expected, actual); }
/** * Checks that if the name is the last token in a sentence it is still outputed * correctly. */ @Test public void testNameAtEnd() { String[] sentence = new String[] { "My", "name", "is", "Anna" }; NameSample sample = new NameSample(sentence, new Span[]{new Span(3, 4)}, false); Assert.assertEquals("My name is <START> Anna <END>", sample.toString()); }
/** * Test serialization of sequential spans. */ @Test public void testSequentialSpans() { String[] sentence = {"A", "Place", "a", "time", "A", "Person", "."}; Span[] names = {new Span(0, 2, "Place"), new Span(2, 4, "Time"), new Span(4, 6, "Person")}; NameSample nameSample = new NameSample(sentence, names, false); Assert.assertEquals( "<START:Place> A Place <END> <START:Time> a time <END> <START:Person> A Person <END> .", nameSample.toString()); }
/** * Test serialization of unsorted sequential spans. */ @Test public void testUnsortedSequentialSpans() { String[] sentence = {"A", "Place", "a", "time", "A", "Person", "."}; Span[] names = {new Span(0, 2, "Place"), new Span(4, 6, "Person"), new Span(2, 4, "Time")}; NameSample nameSample = new NameSample(sentence, names, false); Assert.assertEquals( "<START:Place> A Place <END> <START:Time> a time <END> <START:Person> A Person <END> .", nameSample.toString()); }
@Test public void testIndexWithNewline() throws IOException { String[] sentence = "He belongs to Apache \n Software Foundation .".split(" "); NameContextGenerator CG = new DefaultNameContextGenerator( (AdaptiveFeatureGenerator[]) null); NameSample nameSample = new NameSample(sentence, new Span[] { new Span(3, 7) }, false); ObjectStream<Event> eventStream = new NameFinderEventStream( ObjectStreamUtils.createObjectStream(nameSample), "org", CG, null); DataIndexer indexer = new TwoPassDataIndexer(); indexer.init(new TrainingParameters(Collections.emptyMap()), null); indexer.index(eventStream); Assert.assertEquals(5, indexer.getContexts().length); } }
/** * Tests the correctly generated outcomes for a test sentence. */ @Test public void testOutcomesForSingleTypeSentence() throws IOException { NameSample nameSample = new NameSample(SENTENCE, new Span[]{new Span(0, 2, "person")}, false); try (ObjectStream<Event> eventStream = new NameFinderEventStream( ObjectStreamUtils.createObjectStream(nameSample))) { Assert.assertEquals("person-" + NameFinderME.START, eventStream.read().getOutcome()); Assert.assertEquals("person-" + NameFinderME.CONTINUE, eventStream.read().getOutcome()); for (int i = 0; i < 10; i++) { Assert.assertEquals(NameFinderME.OTHER, eventStream.read().getOutcome()); } Assert.assertNull(eventStream.read()); } }