@Override protected DoccatModel loadModel(InputStream in) throws IOException { return new DoccatModel(in); } }
@Override protected DoccatModel loadModel(InputStream modelIn) throws IOException { return new DoccatModel(modelIn); }
public static DoccatModel train(String languageCode, ObjectStream<DocumentSample> samples, TrainingParameters mlParams, DoccatFactory factory) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<>(); EventTrainer trainer = TrainerFactory.getEventTrainer( mlParams, manifestInfoEntries); MaxentModel model = trainer.train( new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators())); return new DoccatModel(languageCode, model, manifestInfoEntries, factory); } }
@Test public void testDefault() throws IOException { DoccatModel model = train(); Assert.assertNotNull(model); ByteArrayOutputStream out = new ByteArrayOutputStream(); model.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); DoccatModel fromSerialized = new DoccatModel(in); DoccatFactory factory = fromSerialized.getFactory(); Assert.assertNotNull(factory); Assert.assertEquals(1, factory.getFeatureGenerators().length); Assert.assertEquals(BagOfWordsFeatureGenerator.class, factory.getFeatureGenerators()[0].getClass()); }
@Override protected DoccatModel loadModel(InputStream in) throws IOException { return new DoccatModel(in); } }
@Override protected DoccatModel loadModel(InputStream modelIn) throws IOException { return new DoccatModel(modelIn); }
@Override protected DoccatModel loadModel(InputStream modelIn) throws IOException { return new DoccatModel(modelIn); }
@Test public void testCustom() throws IOException { FeatureGenerator[] featureGenerators = { new BagOfWordsFeatureGenerator(), new NGramFeatureGenerator(), new NGramFeatureGenerator(2,3) }; DoccatFactory factory = new DoccatFactory(featureGenerators); DoccatModel model = train(factory); Assert.assertNotNull(model); ByteArrayOutputStream out = new ByteArrayOutputStream(); model.serialize(out); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); DoccatModel fromSerialized = new DoccatModel(in); factory = fromSerialized.getFactory(); Assert.assertNotNull(factory); Assert.assertEquals(3, factory.getFeatureGenerators().length); Assert.assertEquals(BagOfWordsFeatureGenerator.class, factory.getFeatureGenerators()[0].getClass()); Assert.assertEquals(NGramFeatureGenerator.class, factory.getFeatureGenerators()[1].getClass()); Assert.assertEquals(NGramFeatureGenerator.class,factory.getFeatureGenerators()[2].getClass()); }
public static DoccatModel train(String languageCode, ObjectStream<DocumentSample> samples, TrainingParameters mlParams, DoccatFactory factory) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<>(); EventTrainer trainer = TrainerFactory.getEventTrainer( mlParams, manifestInfoEntries); MaxentModel model = trainer.train( new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators())); return new DoccatModel(languageCode, model, manifestInfoEntries, factory); } }
public static DoccatModel train(String languageCode, ObjectStream<DocumentSample> samples, TrainingParameters mlParams, DoccatFactory factory) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<>(); EventTrainer trainer = TrainerFactory.getEventTrainer( mlParams, manifestInfoEntries); MaxentModel model = trainer.train( new DocumentCategorizerEventStream(samples, factory.getFeatureGenerators())); return new DoccatModel(languageCode, model, manifestInfoEntries, factory); } }
public void init(NamedList initArgs) { SolrParams params = SolrParams.toSolrParams(initArgs); String modelDirectory = params.get("modelDirectory", System.getProperty("model.dir"));//<co id="qqpp.model"/> String wordnetDirectory = params.get("wordnetDirectory", System.getProperty("wordnet.dir"));//<co id="qqpp.wordnet"/> if (modelDirectory != null) { File modelsDir = new File(modelDirectory); try { InputStream chunkerStream = new FileInputStream( new File(modelsDir,"en-chunker.bin")); ChunkerModel chunkerModel = new ChunkerModel(chunkerStream); chunker = new ChunkerME(chunkerModel); //<co id="qqpp.chunker"/> InputStream posStream = new FileInputStream( new File(modelsDir,"en-pos-maxent.bin")); POSModel posModel = new POSModel(posStream); tagger = new POSTaggerME(posModel); //<co id="qqpp.tagger"/> model = new DoccatModel(new FileInputStream( //<co id="qqpp.theModel"/> new File(modelDirectory,"en-answer.bin"))) .getChunkerModel(); probs = new double[model.getNumOutcomes()]; atcg = new AnswerTypeContextGenerator( new File(wordnetDirectory, "dict"));//<co id="qqpp.context"/> } catch (IOException e) { throw new RuntimeException(e); } } } /*
try { if (doccatModel == null) { doccatModel = new DoccatModel(new File(props.getProperty("opennlp.sentiment.model.generic"))); documentCategorizerME = new DocumentCategorizerME(doccatModel);
actg, parser); GISModel model = GIS.trainModel(100, new TwoPassDataIndexer(es, 3));//<co id="atc.train.do"/> new DoccatModel("en", model).serialize(new FileOutputStream(outFile));
DoccatModel model = new DoccatModel(modelStream);