sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
@Test public void testMultiFilter() throws IOException { final String[] types = new String[] {person, organization}; filter = new NameSampleTypeFilter(types, sampleStream(text)); NameSample ns = filter.read(); Map<String, List<Span>> collect = Arrays.stream(ns.getNames()) .collect(Collectors.groupingBy(Span::getType)); Assert.assertEquals(2, collect.size()); Assert.assertEquals(2, collect.get(person).size()); Assert.assertEquals(1, collect.get(organization).size()); }
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
@Test public void testNoFilter() throws IOException { final String[] types = new String[] {}; filter = new NameSampleTypeFilter(types, sampleStream(text)); NameSample ns = filter.read(); Assert.assertEquals(0, ns.getNames().length); }
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
@Test public void testSingleFilter() throws IOException { final String[] types = new String[] {organization}; filter = new NameSampleTypeFilter(types, sampleStream(text)); NameSample ns = filter.read(); Assert.assertEquals(1, ns.getNames().length); Assert.assertEquals(organization, ns.getNames()[0].getType()); }
private void crossEval(TrainingParameters params, String type, double expectedScore) throws IOException { try (ObjectStream<NameSample> samples = createNameSampleStream()) { TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", null, params, new TokenNameFinderFactory()); ObjectStream<NameSample> filteredSamples; if (type != null) { filteredSamples = new NameSampleTypeFilter(new String[] {type}, samples); } else { filteredSamples = samples; } cv.evaluate(filteredSamples, 5); Assert.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.001d); } }
/** * Construct an evaluator. It takes from the properties a model, * a testset and the format of the testset. Every other parameter * set in the training, e.g., beamsize, decoding, etc., is serialized * in the model. * @param props the properties parameter * @throws IOException the io exception */ public Evaluate(final Properties props) throws IOException { String lang = props.getProperty("language"); String clearFeatures = props.getProperty("clearFeatures"); String model = props.getProperty("model"); String testSet = props.getProperty("testset"); String corpusFormat = props.getProperty("corpusFormat"); String netypes = props.getProperty("types"); testSamples = AbstractTrainer.getNameStream(testSet, clearFeatures, corpusFormat); if (netypes != Flags.DEFAULT_NE_TYPES) { String[] neTypes = netypes.split(","); testSamples = new NameSampleTypeFilter(neTypes, testSamples); } nercModels.putIfAbsent(lang, new TokenNameFinderModel(new FileInputStream(model))); nameFinder = new NameFinderME(nercModels.get(lang)); }
/** * Construct a trainer with training and test data, and with options for * language, beamsize for decoding, sequence codec and corpus format (conll or opennlp). * @param params the training parameters * @throws IOException * io exception */ public AbstractTrainer(final TrainingParameters params) throws IOException { this.lang = Flags.getLanguage(params); this.clearTrainingFeatures = Flags.getClearTrainingFeatures(params); this.clearEvaluationFeatures = Flags.getClearEvaluationFeatures(params); this.corpusFormat = Flags.getCorpusFormat(params); this.trainData = params.getSettings().get("TrainSet"); this.testData = params.getSettings().get("TestSet"); trainSamples = getNameStream(trainData, clearTrainingFeatures, corpusFormat); testSamples = getNameStream(testData, clearEvaluationFeatures, corpusFormat); this.beamSize = Flags.getBeamsize(params); this.sequenceCodec = Flags.getSequenceCodec(params); if (params.getSettings().get("Types") != null) { String netypes = params.getSettings().get("Types"); String[] neTypes = netypes.split(","); trainSamples = new NameSampleTypeFilter(neTypes, trainSamples); testSamples = new NameSampleTypeFilter(neTypes, testSamples); } }
public CrossValidator(final TrainingParameters params) throws IOException { this.lang = Flags.getLanguage(params); String clearFeatures = Flags.getClearTrainingFeatures(params); this.corpusFormat = Flags.getCorpusFormat(params); this.trainData = params.getSettings().get("TrainSet"); trainSamples = AbstractTrainer.getNameStream(trainData, clearFeatures, corpusFormat); this.beamSize = Flags.getBeamsize(params); this.folds = Flags.getFolds(params); this.sequenceCodec = TokenNameFinderFactory.instantiateSequenceCodec(getSequenceCodec(Flags.getSequenceCodec(params))); if (params.getSettings().get("Types") != null) { String netypes = params.getSettings().get("Types"); String[] neTypes = netypes.split(","); trainSamples = new NameSampleTypeFilter(neTypes, trainSamples); } createNameFactory(params); getEvalListeners(params); }
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);
sampleStream = new NameSampleTypeFilter(nameTypes, sampleStream);