private void crossEval(TrainingParameters params, String type, double expectedScore) throws IOException { try (ObjectStream<NameSample> samples = createNameSampleStream()) { TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", null, params, new TokenNameFinderFactory()); ObjectStream<NameSample> filteredSamples; if (type != null) { filteredSamples = new NameSampleTypeFilter(new String[] {type}, samples); } else { filteredSamples = samples; } cv.evaluate(filteredSamples, 5); Assert.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.001d); } }
@Test(expected = InsufficientTrainingDataException.class) public void testWithInsufficientData() throws Exception { InputStreamFactory in = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/namefind/AnnotatedSentencesInsufficient.txt"); ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(in, StandardCharsets.ISO_8859_1)); TrainingParameters mlParams = new TrainingParameters(); mlParams.put(TrainingParameters.ITERATIONS_PARAM, 70); mlParams.put(TrainingParameters.CUTOFF_PARAM, 1); mlParams.put(TrainingParameters.ALGORITHM_PARAM, ModelType.MAXENT.toString()); TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", TYPE, mlParams, null, (TokenNameFinderEvaluationMonitor)null); cv.evaluate(sampleStream, 2); }
TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", null, params, featureGen, resources); cv.evaluate(filteredSamples, 5); Assert.assertEquals(0.8070226153653437d, cv.getFMeasure().getFMeasure(), 0.001d);
validator = new TokenNameFinderCrossValidator(params.getLang(), params.getType(), mlParams, nameFinderFactory, listeners.toArray(new TokenNameFinderEvaluationMonitor[listeners.size()])); validator.evaluate(sampleStream, params.getFolds()); } catch (IOException e) { throw createTerminationIOException(e); System.out.println(validator.getFMeasure()); } else { System.out.println(detailedFListener.toString());
@Test /* * Test that reproduces jira OPENNLP-463 */ public void testWithNullResources() throws Exception { InputStreamFactory in = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/namefind/AnnotatedSentences.txt"); ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(in, StandardCharsets.ISO_8859_1)); TrainingParameters mlParams = new TrainingParameters(); mlParams.put(TrainingParameters.ITERATIONS_PARAM, 70); mlParams.put(TrainingParameters.CUTOFF_PARAM, 1); mlParams.put(TrainingParameters.ALGORITHM_PARAM, ModelType.MAXENT.toString()); TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", TYPE, mlParams, null, (TokenNameFinderEvaluationMonitor)null); cv.evaluate(sampleStream, 2); Assert.assertNotNull(cv.getFMeasure()); }
@Test /* * Test that tries to reproduce jira OPENNLP-466 */ public void testWithNameEvaluationErrorListener() throws Exception { InputStreamFactory in = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/namefind/AnnotatedSentences.txt"); ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(in, StandardCharsets.ISO_8859_1)); TrainingParameters mlParams = new TrainingParameters(); mlParams.put(TrainingParameters.ITERATIONS_PARAM, 70); mlParams.put(TrainingParameters.CUTOFF_PARAM, 1); mlParams.put(TrainingParameters.ALGORITHM_PARAM, ModelType.MAXENT.toString()); ByteArrayOutputStream out = new ByteArrayOutputStream(); NameEvaluationErrorListener listener = new NameEvaluationErrorListener(out); Map<String, Object> resources = Collections.emptyMap(); TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", TYPE, mlParams, null, resources, listener); cv.evaluate(sampleStream, 2); Assert.assertTrue(out.size() > 0); Assert.assertNotNull(cv.getFMeasure()); }
public final void crossValidate(final TrainingParameters params) { if (nameClassifierFactory == null) { throw new IllegalStateException( "Classes derived from AbstractNameFinderTrainer must create and fill the AdaptiveFeatureGenerator features!"); } TokenNameFinderCrossValidator validator = null; try { validator = new TokenNameFinderCrossValidator(lang, null, params, nameClassifierFactory, listeners.toArray(new TokenNameFinderEvaluationMonitor[listeners.size()])); validator.evaluate(trainSamples, folds); } catch (IOException e) { System.err.println("IO error while loading training set!"); e.printStackTrace(); System.exit(1); } finally { try { trainSamples.close(); } catch (IOException e) { System.err.println("IO error with the train samples!"); } } if (detailedFListener == null) { System.out.println(validator.getFMeasure()); } else { System.out.println(detailedFListener.toString()); } }
validator = new TokenNameFinderCrossValidator(params.getLang(), params.getType(), mlParams, nameFinderFactory, listeners.toArray(new TokenNameFinderEvaluationMonitor[listeners.size()])); validator.evaluate(sampleStream, params.getFolds()); } catch (IOException e) { throw createTerminationIOException(e); System.out.println(validator.getFMeasure()); } else { System.out.println(detailedFListener.toString());
validator = new TokenNameFinderCrossValidator(params.getLang(), params.getType(), mlParams, nameFinderFactory, listeners.toArray(new TokenNameFinderEvaluationMonitor[listeners.size()])); validator.evaluate(sampleStream, params.getFolds()); } catch (IOException e) { throw createTerminationIOException(e); System.out.println(validator.getFMeasure()); } else { System.out.println(detailedFListener.toString());