@Override public int hashCode() { return Objects.hash(Arrays.hashCode(getSentence()), Arrays.hashCode(getNames()), Arrays.hashCode(getAdditionalContext()), isClearAdaptiveDataSet()); }
@Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj instanceof NameSample) { NameSample a = (NameSample) obj; return Arrays.equals(getSentence(), a.getSentence()) && Arrays.equals(getNames(), a.getNames()) && Arrays.equals(getAdditionalContext(), a.getAdditionalContext()) && isClearAdaptiveDataSet() == a.isClearAdaptiveDataSet(); } return false; }
public DocumentSample read() throws IOException { List<NameSample> document = new ArrayList<>(); if (beginSample == null) { // Assume that the clear flag is set beginSample = samples.read(); } // Underlying stream is exhausted! if (beginSample == null) { return null; } document.add(beginSample); NameSample sample; while ((sample = samples.read()) != null) { if (sample.isClearAdaptiveDataSet()) { beginSample = sample; break; } document.add(sample); } // Underlying stream is exhausted, // next call must return null if (sample == null) { beginSample = null; } return new DocumentSample(document.toArray(new NameSample[document.size()])); }
if (isClearAdaptiveDataSet()) result.append("\n");
public NameSample read() throws IOException { NameSample sample = samples.read(); if (sample != null) { List<Span> filteredNames = new ArrayList<>(); for (Span name : sample.getNames()) { if (types.contains(name.getType())) { filteredNames.add(name); } } return new NameSample(sample.getId(), sample.getSentence(), filteredNames.toArray(new Span[filteredNames.size()]), null, sample.isClearAdaptiveDataSet()); } else { return null; } } }
@Override protected Iterator<Event> createEvents(NameSample sample) { if (sample.isClearAdaptiveDataSet()) { contextGenerator.clearAdaptiveData(); } Span[] names = sample.getNames(); if (!Objects.isNull(this.defaultType)) { overrideType(names); } String[] outcomes = codec.encode(names, sample.getSentence().length); // String outcomes[] = generateOutcomes(sample.getNames(), type, sample.getSentence().length); additionalContextFeatureGenerator.setCurrentContext(sample.getAdditionalContext()); String[] tokens = new String[sample.getSentence().length]; for (int i = 0; i < sample.getSentence().length; i++) { tokens[i] = sample.getSentence()[i]; } return generateEvents(tokens, outcomes, contextGenerator).iterator(); }
/** * Evaluates the given reference {@link NameSample} object. * * This is done by finding the names with the * {@link TokenNameFinder} in the sentence from the reference * {@link NameSample}. The found names are then used to * calculate and update the scores. * * @param reference the reference {@link NameSample}. * * @return the predicted {@link NameSample}. */ @Override protected NameSample processSample(NameSample reference) { if (reference.isClearAdaptiveDataSet()) { nameFinder.clearAdaptiveData(); } Span[] predictedNames = nameFinder.find(reference.getSentence()); Span[] references = reference.getNames(); // OPENNLP-396 When evaluating with a file in the old format // the type of the span is null, but must be set to default to match // the output of the name finder. for (int i = 0; i < references.length; i++) { if (references[i].getType() == null) { references[i] = new Span(references[i].getStart(), references[i].getEnd(), "default"); } } fmeasure.updateScores(references, predictedNames); return new NameSample(reference.getSentence(), predictedNames, reference.isClearAdaptiveDataSet()); }
@Test public void testClearAdaptiveData() throws IOException { String trainingData = "a\n" + "b\n" + "c\n" + "\n" + "d\n"; ObjectStream<String> untokenizedLineStream = new PlainTextByLineStream( new MockInputStreamFactory(trainingData), StandardCharsets.UTF_8); ObjectStream<NameSample> trainingStream = new NameSampleDataStream(untokenizedLineStream); assertFalse(trainingStream.read().isClearAdaptiveDataSet()); assertFalse(trainingStream.read().isClearAdaptiveDataSet()); assertFalse(trainingStream.read().isClearAdaptiveDataSet()); assertTrue(trainingStream.read().isClearAdaptiveDataSet()); assertNull(trainingStream.read()); trainingStream.close(); }
@Test public void testParsingDutchSample() throws IOException { ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample"); NameSample personName = sampleStream.read(); Assert.assertEquals(0, personName.getNames().length); Assert.assertTrue(personName.isClearAdaptiveDataSet()); personName = sampleStream.read(); Assert.assertFalse(personName.isClearAdaptiveDataSet()); Assert.assertNull(sampleStream.read()); }
@Test public void testParsingGermanSample() throws IOException { ObjectStream<NameSample> sampleStream = openData(LANGUAGE.DE, GERMAN_SAMPLE); NameSample personName = sampleStream.read(); Assert.assertNotNull(personName); Assert.assertEquals(5, personName.getSentence().length); Assert.assertEquals(0, personName.getNames().length); Assert.assertEquals(true, personName.isClearAdaptiveDataSet()); }
@Test public void testParsingItalianSample() throws IOException { ObjectStream<NameSample> sampleStream = openData(LANGUAGE.IT, "evalita-ner-it.sample"); NameSample personName = sampleStream.read(); Assert.assertNotNull(personName); Assert.assertEquals(11, personName.getSentence().length); Assert.assertEquals(1, personName.getNames().length); Assert.assertEquals(true, personName.isClearAdaptiveDataSet()); Span nameSpan = personName.getNames()[0]; Assert.assertEquals(8, nameSpan.getStart()); Assert.assertEquals(10, nameSpan.getEnd()); Assert.assertEquals(true, personName.isClearAdaptiveDataSet()); Assert.assertEquals(0, sampleStream.read().getNames().length); Assert.assertNull(sampleStream.read()); }
@Test public void testParsingSpanishSample() throws IOException { ObjectStream<NameSample> sampleStream = openData(LANGUAGE.SPA, "conll2002-es.sample"); NameSample personName = sampleStream.read(); Assert.assertNotNull(personName); Assert.assertEquals(5, personName.getSentence().length); Assert.assertEquals(1, personName.getNames().length); Assert.assertEquals(true, personName.isClearAdaptiveDataSet()); Span nameSpan = personName.getNames()[0]; Assert.assertEquals(0, nameSpan.getStart()); Assert.assertEquals(4, nameSpan.getEnd()); Assert.assertEquals(true, personName.isClearAdaptiveDataSet()); Assert.assertEquals(0, sampleStream.read().getNames().length); Assert.assertNull(sampleStream.read()); }
@Override public int hashCode() { return Objects.hash(Arrays.hashCode(getSentence()), Arrays.hashCode(getNames()), Arrays.hashCode(getAdditionalContext()), isClearAdaptiveDataSet()); }
@Override public int hashCode() { return Objects.hash(Arrays.hashCode(getSentence()), Arrays.hashCode(getNames()), Arrays.hashCode(getAdditionalContext()), isClearAdaptiveDataSet()); }
@Test public void testParsingEnglishSample() throws IOException { ObjectStream<NameSample> sampleStream = openData(LANGUAGE.EN, ENGLISH_SAMPLE); NameSample personName = sampleStream.read(); Assert.assertNotNull(personName); Assert.assertEquals(9, personName.getSentence().length); Assert.assertEquals(0, personName.getNames().length); Assert.assertEquals(true, personName.isClearAdaptiveDataSet()); personName = sampleStream.read(); Assert.assertNotNull(personName); Assert.assertEquals(2, personName.getSentence().length); Assert.assertEquals(1, personName.getNames().length); Assert.assertEquals(false, personName.isClearAdaptiveDataSet()); Span nameSpan = personName.getNames()[0]; Assert.assertEquals(0, nameSpan.getStart()); Assert.assertEquals(2, nameSpan.getEnd()); Assert.assertNull(sampleStream.read()); }
@Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj instanceof NameSample) { NameSample a = (NameSample) obj; return Arrays.equals(getSentence(), a.getSentence()) && Arrays.equals(getNames(), a.getNames()) && Arrays.equals(getAdditionalContext(), a.getAdditionalContext()) && isClearAdaptiveDataSet() == a.isClearAdaptiveDataSet(); } return false; }
@Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj instanceof NameSample) { NameSample a = (NameSample) obj; return Arrays.equals(getSentence(), a.getSentence()) && Arrays.equals(getNames(), a.getNames()) && Arrays.equals(getAdditionalContext(), a.getAdditionalContext()) && isClearAdaptiveDataSet() == a.isClearAdaptiveDataSet(); } return false; }
public NameSample read() throws IOException { NameSample sample = samples.read(); if (sample != null) { List<Span> filteredNames = new ArrayList<>(); for (Span name : sample.getNames()) { if (types.contains(name.getType())) { filteredNames.add(name); } } return new NameSample(sample.getId(), sample.getSentence(), filteredNames.toArray(new Span[filteredNames.size()]), null, sample.isClearAdaptiveDataSet()); } else { return null; } } }
public NameSample read() throws IOException { NameSample sample = samples.read(); if (sample != null) { List<Span> filteredNames = new ArrayList<>(); for (Span name : sample.getNames()) { if (types.contains(name.getType())) { filteredNames.add(name); } } return new NameSample(sample.getId(), sample.getSentence(), filteredNames.toArray(new Span[filteredNames.size()]), null, sample.isClearAdaptiveDataSet()); } else { return null; } } }
@Override protected Iterator<Event> createEvents(NameSample sample) { if (sample.isClearAdaptiveDataSet()) { contextGenerator.clearAdaptiveData(); } Span[] names = sample.getNames(); if (!Objects.isNull(this.defaultType)) { overrideType(names); } String[] outcomes = codec.encode(names, sample.getSentence().length); // String outcomes[] = generateOutcomes(sample.getNames(), type, sample.getSentence().length); additionalContextFeatureGenerator.setCurrentContext(sample.getAdditionalContext()); String[] tokens = new String[sample.getSentence().length]; for (int i = 0; i < sample.getSentence().length; i++) { tokens[i] = sample.getSentence()[i]; } return generateEvents(tokens, outcomes, contextGenerator).iterator(); }