public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel, byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) { this(languageCode, nameFinderModel, NameFinderME.DEFAULT_BEAM_SIZE, generatorDescriptor, resources, manifestInfoEntries, new BioCodec(), new TokenNameFinderFactory()); }
public static TokenNameFinderFactory create(String subclassName, byte[] featureGeneratorBytes, final Map<String, Object> resources, SequenceCodec<String> seqCodec) throws InvalidFormatException { TokenNameFinderFactory theFactory; if (subclassName == null) { // will create the default factory theFactory = new TokenNameFinderFactory(); } else { try { theFactory = ExtensionLoader.instantiateExtension( TokenNameFinderFactory.class, subclassName); } catch (Exception e) { String msg = "Could not instantiate the " + subclassName + ". The initialization throw an exception."; System.err.println(msg); e.printStackTrace(); throw new InvalidFormatException(msg, e); } } theFactory.init(featureGeneratorBytes, resources, seqCodec); return theFactory; }
private File trainModel() throws IOException { ObjectStream<String> lineStream = new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/AnnotatedSentencesWithTypes.txt")), StandardCharsets.ISO_8859_1); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel model; TokenNameFinderFactory nameFinderFactory = new TokenNameFinderFactory(); try (ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream)) { model = NameFinderME.train("eng", null, sampleStream, params, nameFinderFactory); } File modelFile = File.createTempFile("model", ".bin"); try (BufferedOutputStream modelOut = new BufferedOutputStream(new FileOutputStream(modelFile))) { model.serialize(modelOut); } return modelFile; }
private TokenNameFinderModel train(File trainFile, LANGUAGE lang, TrainingParameters params, int types) throws IOException { ObjectStream<NameSample> samples = new Conll02NameSampleStream( lang,new MarkableFileInputStreamFactory(trainFile), types); return NameFinderME.train(lang.toString().toLowerCase(), null, samples, params, new TokenNameFinderFactory()); }
private void crossEval(TrainingParameters params, String type, double expectedScore) throws IOException { try (ObjectStream<NameSample> samples = createNameSampleStream()) { TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", null, params, new TokenNameFinderFactory()); ObjectStream<NameSample> filteredSamples; if (type != null) { filteredSamples = new NameSampleTypeFilter(new String[] {type}, samples); } else { filteredSamples = samples; } cv.evaluate(filteredSamples, 5); Assert.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.001d); } }
public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel, byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) { this(languageCode, nameFinderModel, NameFinderME.DEFAULT_BEAM_SIZE, generatorDescriptor, resources, manifestInfoEntries, new BioCodec(), new TokenNameFinderFactory()); }
public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel, byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) { this(languageCode, nameFinderModel, NameFinderME.DEFAULT_BEAM_SIZE, generatorDescriptor, resources, manifestInfoEntries, new BioCodec(), new TokenNameFinderFactory()); }
private TokenNameFinderModel train(List<NameSample> aNameSamples, TrainingParameters aParameters) throws RecommendationException { try (NameSampleStream stream = new NameSampleStream(aNameSamples)) { TokenNameFinderFactory finderFactory = new TokenNameFinderFactory(); return NameFinderME.train("unknown", null, stream, aParameters, finderFactory); } catch (IOException e) { LOG.error("Exception during training the OpenNLP Named Entity Recognizer model.", e); throw new RecommendationException("Error while training OpenNLP pos", e); } } }
public static TokenNameFinderFactory create(String subclassName, byte[] featureGeneratorBytes, final Map<String, Object> resources, SequenceCodec<String> seqCodec) throws InvalidFormatException { TokenNameFinderFactory theFactory; if (subclassName == null) { // will create the default factory theFactory = new TokenNameFinderFactory(); } else { try { theFactory = ExtensionLoader.instantiateExtension( TokenNameFinderFactory.class, subclassName); } catch (Exception e) { String msg = "Could not instantiate the " + subclassName + ". The initialization throw an exception."; System.err.println(msg); e.printStackTrace(); throw new InvalidFormatException(msg, e); } } theFactory.init(featureGeneratorBytes, resources, seqCodec); return theFactory; }
public static TokenNameFinderFactory create(String subclassName, byte[] featureGeneratorBytes, final Map<String, Object> resources, SequenceCodec<String> seqCodec) throws InvalidFormatException { TokenNameFinderFactory theFactory; if (subclassName == null) { // will create the default factory theFactory = new TokenNameFinderFactory(); } else { try { theFactory = ExtensionLoader.instantiateExtension( TokenNameFinderFactory.class, subclassName); } catch (Exception e) { String msg = "Could not instantiate the " + subclassName + ". The initialization throw an exception."; System.err.println(msg); e.printStackTrace(); throw new InvalidFormatException(msg, e); } } theFactory.init(featureGeneratorBytes, resources, seqCodec); return theFactory; }
try { return NameFinderME.train(language, null, stream, params, new TokenNameFinderFactory(featureGenCfg, Collections.<String, Object>emptyMap(), sequenceEncoding.getCodec()));
TokenNameFinderModel tokenNameFinderModel = NameFinderME.train("en", slot, combinedNameSampleStream, trainingParams, new TokenNameFinderFactory()); combinedNameSampleStream.close(); tokenNameFinderModels.add(tokenNameFinderModel);