public void writeReport() { printGeneralStatistics(); printTagsErrorRank(); printGeneralConfusionTable(); } }
public void correctlyClassified(NameSample reference, NameSample prediction) { statsAdd(reference, prediction); }
public String getHelp() { return "Usage: " + CLI.CMD + " " + getName() + " model1 model2 ... modelN < sentences"; }
mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true); if (mlParams == null) { mlParams = new TrainingParameters(); TokenNameFinderTrainerTool.openFeatureGeneratorBytes(params.getFeaturegen()); resources = TokenNameFinderTrainerTool.loadResources(params.getResources(), params.getFeaturegen()); if (params.getNameTypes() != null) { String[] nameTypes = params.getNameTypes().split(","); sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream); if (params.getMisclassified()) { listeners.add(new NameEvaluationErrorListener()); if (params.getDetailedF()) { detailedFListener = new TokenNameFinderDetailedFMeasureListener(); listeners.add(detailedFListener); String sequenceCodecImplName = params.getSequenceCodec(); File reportFile = params.getReportOutputFile(); OutputStream reportOutputStream = null; try { reportOutputStream = new FileOutputStream(reportFile); reportListener = new TokenNameFinderFineGrainedReportListener(sequenceCodec, reportOutputStream); listeners.add(reportListener);
mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true); if (mlParams == null) { mlParams = new TrainingParameters(); File modelOutFile = params.getModel(); byte[] featureGeneratorBytes = openFeatureGeneratorBytes(params.getFeaturegen()); resources = loadResources(params.getResources(), params.getFeaturegen()); if (params.getNameTypes() != null) { String[] nameTypes = params.getNameTypes().split(","); sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream); String sequenceCodecImplName = params.getSequenceCodec(); nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(), featureGeneratorBytes, resources, sequenceCodec); } catch (InvalidFormatException e) { NameSampleCountersStream counters = new NameSampleCountersStream(sampleStream); sampleStream = counters; try { model = opennlp.tools.namefind.NameFinderME.train( params.getLang(), params.getType(), sampleStream, mlParams, nameFinderFactory); throw createTerminationIOException(e);
TokenNameFinderModel model = new TokenNameFinderModelLoader().load(params.getModel()); if (params.getMisclassified()) { listeners.add(new NameEvaluationErrorListener()); if (params.getDetailedF()) { detailedFListener = new TokenNameFinderDetailedFMeasureListener(); listeners.add(detailedFListener); File reportFile = params.getReportOutputFile(); OutputStream reportOutputStream = null; try { reportOutputStream = new FileOutputStream(reportFile); reportListener = new TokenNameFinderFineGrainedReportListener(model.getSequenceCodec(), reportOutputStream); listeners.add(reportListener); if (params.getNameTypes() != null) { String[] nameTypes = params.getNameTypes().split(","); sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream); reportListener.writeReport(); System.out.println(evaluator.getFMeasure()); } else { System.out.println(detailedFListener.toString());
public void run(String[] args) { Parameters params = validateAndParseParams(args, Parameters.class); File testData = new File(params.getCensusData()); File dictOutFile = new File(params.getDict()); try ( ObjectStream<StringList> sampleStream = new NameFinderCensus90NameStream( sampleDataIn, Charset.forName(params.getEncoding()))) { System.out.println("Creating Dictionary..."); mDictionary = createDictionary(sampleStream); } catch (IOException e) { throw new TerminateToolException(-1, "IO error while reading training data or indexing data: "
System.out.println(getHelp()); } else { TokenNameFinderModel model = new TokenNameFinderModelLoader().load(new File(args[i])); nameFinders[i] = new NameFinderME(model);
public void printSummary() { System.out.println("Training data summary:"); System.out.println("#Sentences: " + getSentenceCount()); System.out.println("#Tokens: " + getTokenCount()); int totalNames = 0; for (Map.Entry<String, Integer> counter : getNameCounters().entrySet()) { System.out.println("#" + counter.getKey() + " entities: " + counter.getValue()); totalNames += counter.getValue(); } } }
@Test public void testEvaluator() throws IOException, URISyntaxException { DictionaryNameFinder nameFinder = new DictionaryNameFinder( createDictionary()); TokenNameFinderEvaluator evaluator = new TokenNameFinderEvaluator( nameFinder, new NameEvaluationErrorListener()); ObjectStream<NameSample> sample = createSample(); evaluator.evaluate(sample); sample.close(); FMeasure fmeasure = evaluator.getFMeasure(); Assert.assertTrue(fmeasure.getFMeasure() == 1); Assert.assertTrue(fmeasure.getRecallScore() == 1); }
@Test() public void usage() { String[] args = new String[]{}; ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); System.setOut(ps); TokenNameFinderTool tool = new TokenNameFinderTool(); tool.run(args); final String content = new String(baos.toByteArray(), StandardCharsets.UTF_8); Assert.assertEquals(tool.getHelp(), content.trim()); }
@Test(expected = TerminateToolException.class) public void invalidModel() { String[] args = new String[]{"invalidmodel.bin"}; TokenNameFinderTool tool = new TokenNameFinderTool(); tool.run(args); }
public String getHelp() { return getBasicHelp(Parameters.class); }
static byte[] openFeatureGeneratorBytes(String featureGenDescriptorFile) { if (featureGenDescriptorFile != null) { return openFeatureGeneratorBytes(new File(featureGenDescriptorFile)); } return null; }
@Override public void missclassified(NameSample reference, NameSample prediction) { printError(reference.getId(), reference.getNames(), prediction.getNames(), reference, prediction, reference.getSentence()); }
private void statsAdd(NameSample reference, NameSample prediction) { String[] refTags = sequenceCodec.encode(reference.getNames(), reference.getSentence().length); String[] predTags = sequenceCodec.encode(prediction.getNames(), prediction.getSentence().length); // we don' want it to compute token frequency, so we pass an array of empty strings instead // of tokens getStats().add(new String[reference.getSentence().length], refTags, predTags); }
@Test public void testNegative() { OutputStream stream = new ByteArrayOutputStream(); TokenNameFinderEvaluationMonitor listener = new NameEvaluationErrorListener(stream); Span[] pred = createSimpleNameSampleB().getNames(); TokenNameFinderEvaluator eval = new TokenNameFinderEvaluator(new DummyNameFinder(pred), listener); eval.evaluateSample(createSimpleNameSampleA()); Assert.assertEquals(0.8, eval.getFMeasure().getFMeasure(), 0.0); Assert.assertNotSame(0, stream.toString().length()); }
public void missclassified(NameSample reference, NameSample prediction) { statsAdd(reference, prediction); }
@Test public void run() throws IOException { File model1 = trainModel(); String[] args = new String[]{model1.getAbsolutePath()}; final String in = "It is Stefanie Schmidt.\n\nNothing in this sentence."; InputStream stream = new ByteArrayInputStream(in.getBytes(StandardCharsets.UTF_8)); System.setIn(stream); ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream ps = new PrintStream(baos); System.setOut(ps); TokenNameFinderTool tool = new TokenNameFinderTool(); tool.run(args); final String content = new String(baos.toByteArray(), StandardCharsets.UTF_8); Assert.assertTrue(content.contains("It is <START:person> Stefanie Schmidt. <END>")); model1.delete(); }
@Test public void testPositive() { OutputStream stream = new ByteArrayOutputStream(); TokenNameFinderEvaluationMonitor listener = new NameEvaluationErrorListener(stream); Span[] pred = createSimpleNameSampleA().getNames(); TokenNameFinderEvaluator eval = new TokenNameFinderEvaluator(new DummyNameFinder(pred), listener); eval.evaluateSample(createSimpleNameSampleA()); Assert.assertEquals(1.0, eval.getFMeasure().getFMeasure(), 0.0); Assert.assertEquals(0, stream.toString().length()); }