/** * Train a model using the GIS algorithm, assuming 100 iterations and no * cutoff. * * @param eventStream * The EventStream holding the data on which this model will be * trained. * @return The newly trained model, which can be used immediately or saved to * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object. */ public GISModel trainModel(ObjectStream<Event> eventStream) throws IOException { return trainModel(eventStream, 100, 0); }
/** * Train a model using the GIS algorithm. * * @param iterations The number of GIS iterations to perform. * @param di The data indexer used to compress events in memory. * @return The newly trained model, which can be used immediately or saved * to disk using an opennlp.tools.ml.maxent.io.GISModelWriter object. */ public GISModel trainModel(int iterations, DataIndexer di) { return trainModel(iterations, di, new UniformPrior(), 1); }
/** * Train a model using the GIS algorithm. * * @param iterations The number of GIS iterations to perform. * @param di The data indexer used to compress events in memory. * @param threads * @return The newly trained model, which can be used immediately or saved * to disk using an opennlp.tools.ml.maxent.io.GISModelWriter object. */ public GISModel trainModel(int iterations, DataIndexer di, int threads) { return trainModel(iterations, di, new UniformPrior(), threads); }
@Override public MaxentModel doTrain(DataIndexer indexer) throws IOException { int iterations = getIterations(); int threads = trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1); AbstractModel model = trainModel(iterations, indexer, threads); return model; }
/** * Trains a GIS model on the event in the specified event stream, using the specified number * of iterations and the specified count cutoff. * * @param eventStream A stream of all events. * @param iterations The number of iterations to use for GIS. * @param cutoff The number of times a feature must occur to be included. * @return A GIS model trained with specified */ public GISModel trainModel(ObjectStream<Event> eventStream, int iterations, int cutoff) throws IOException { DataIndexer indexer = new OnePassDataIndexer(); TrainingParameters indexingParameters = new TrainingParameters(); indexingParameters.put(GISTrainer.CUTOFF_PARAM, cutoff); indexingParameters.put(GISTrainer.ITERATIONS_PARAM, iterations); Map<String, String> reportMap = new HashMap<>(); indexer.init(indexingParameters, reportMap); indexer.index(eventStream); return trainModel(iterations, indexer); }
/** * Train a model using the GIS algorithm, assuming 100 iterations and no * cutoff. * * @param eventStream * The EventStream holding the data on which this model will be * trained. * @return The newly trained model, which can be used immediately or saved to * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object. */ public GISModel trainModel(ObjectStream<Event> eventStream) throws IOException { return trainModel(eventStream, 100, 0); }
/** * Train a model using the GIS algorithm, assuming 100 iterations and no * cutoff. * * @param eventStream * The EventStream holding the data on which this model will be * trained. * @return The newly trained model, which can be used immediately or saved to * disk using an opennlp.tools.ml.maxent.io.GISModelWriter object. */ public GISModel trainModel(ObjectStream<Event> eventStream) throws IOException { return trainModel(eventStream, 100, 0); }
@Test public void testMaxentOnPrepAttachData() throws IOException { testDataIndexer.index(PrepAttachDataUtil.createTrainingStream()); // this shows why the GISTrainer should be a AbstractEventTrainer. // TODO: make sure that the trainingParameter cutoff and the // cutoff value passed here are equal. AbstractModel model = new GISTrainer(true).trainModel(100, testDataIndexer, new UniformPrior(), 1); PrepAttachDataUtil.testModel(model, 0.7997028967566229); }
@Test public void testMaxentOnPrepAttachData2Threads() throws IOException { testDataIndexer.index(PrepAttachDataUtil.createTrainingStream()); AbstractModel model = new GISTrainer(true).trainModel(100, testDataIndexer, new UniformPrior(), 2); PrepAttachDataUtil.testModel(model, 0.7997028967566229); }
"src/test/resources/data/opennlp/maxent/real-valued-weights-training-data.txt")) { testDataIndexer.index(rvfes1); realModel = gisTrainer.trainModel(100, testDataIndexer); "src/test/resources/data/opennlp/maxent/repeat-weighting-training-data.txt")) { testDataIndexer.index(rvfes2); repeatModel = gisTrainer.trainModel(100,testDataIndexer);
@Test public void testGISTrainSignature4() throws IOException { try (ObjectStream<Event> eventStream = createEventStream()) { TrainingParameters params = ModelUtil.createDefaultTrainingParameters(); params.put(AbstractTrainer.ITERATIONS_PARAM, 10); params.put(AbstractTrainer.CUTOFF_PARAM, 1); GISTrainer trainer = (GISTrainer) TrainerFactory.getEventTrainer(params, null); trainer.setGaussianSigma(0.01); Assert.assertNotNull(trainer.trainModel(eventStream)); } }
/** * Train a model using the GIS algorithm. * * @param iterations The number of GIS iterations to perform. * @param di The data indexer used to compress events in memory. * @param threads * @return The newly trained model, which can be used immediately or saved * to disk using an opennlp.tools.ml.maxent.io.GISModelWriter object. */ public GISModel trainModel(int iterations, DataIndexer di, int threads) { return trainModel(iterations, di, new UniformPrior(), threads); }
/** * Train a model using the GIS algorithm. * * @param iterations The number of GIS iterations to perform. * @param di The data indexer used to compress events in memory. * @return The newly trained model, which can be used immediately or saved * to disk using an opennlp.tools.ml.maxent.io.GISModelWriter object. */ public GISModel trainModel(int iterations, DataIndexer di) { return trainModel(iterations, di, new UniformPrior(), 1); }
/** * Train a model using the GIS algorithm. * * @param iterations The number of GIS iterations to perform. * @param di The data indexer used to compress events in memory. * @return The newly trained model, which can be used immediately or saved * to disk using an opennlp.tools.ml.maxent.io.GISModelWriter object. */ public GISModel trainModel(int iterations, DataIndexer di) { return trainModel(iterations, di, new UniformPrior(), 1); }
/** * Train a model using the GIS algorithm. * * @param iterations The number of GIS iterations to perform. * @param di The data indexer used to compress events in memory. * @param threads * @return The newly trained model, which can be used immediately or saved * to disk using an opennlp.tools.ml.maxent.io.GISModelWriter object. */ public GISModel trainModel(int iterations, DataIndexer di, int threads) { return trainModel(iterations, di, new UniformPrior(), threads); }
@Override public MaxentModel doTrain(DataIndexer indexer) throws IOException { int iterations = getIterations(); int threads = trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1); AbstractModel model = trainModel(iterations, indexer, threads); return model; }
@Override public MaxentModel doTrain(DataIndexer indexer) throws IOException { int iterations = getIterations(); int threads = trainingParameters.getIntParameter(TrainingParameters.THREADS_PARAM, 1); AbstractModel model = trainModel(iterations, indexer, threads); return model; }
public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, int cutoff, int iterations) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<String, String>(); ModelUtil.addCutoffAndIterations(manifestInfoEntries, cutoff, iterations); Factory factory = new Factory(); // TODO: Fix the EventStream to throw exceptions when training goes wrong SDEventStream eventStream = new SDEventStream(samples, factory.createSentenceContextGenerator(languageCode), factory.createEndOfSentenceScanner(languageCode)); HashSumEventStream hses = new HashSumEventStream(eventStream); // AbstractObjectStream<Event> GISTrainer trainer = new GISTrainer(); MaxentModel sentModel = trainer.trainModel(hses, iterations, cutoff); manifestInfoEntries.put(BaseModel.TRAINING_EVENTHASH_PROPERTY, hses.calculateHashSum().toString(16)); return new SentenceModel(languageCode, sentModel, useTokenEnd, abbreviations, manifestInfoEntries); }
public static SentenceModel train(String languageCode, ObjectStream<SentenceSample> samples, boolean useTokenEnd, Dictionary abbreviations, int cutoff, int iterations) throws IOException { Map<String, String> manifestInfoEntries = new HashMap<String, String>(); ModelUtil.addCutoffAndIterations(manifestInfoEntries, cutoff, iterations); Factory factory = new Factory(); // TODO: Fix the EventStream to throw exceptions when training goes wrong SDEventStream eventStream = new SDEventStream(samples, factory.createSentenceContextGenerator(languageCode), factory.createEndOfSentenceScanner(languageCode)); HashSumEventStream hses = new HashSumEventStream(eventStream); // AbstractObjectStream<Event> GISTrainer trainer = new GISTrainer(); MaxentModel sentModel = trainer.trainModel(hses, iterations, cutoff); manifestInfoEntries.put(BaseModel.TRAINING_EVENTHASH_PROPERTY, hses.calculateHashSum().toString(16)); return new SentenceModel(languageCode, sentModel, useTokenEnd, abbreviations, manifestInfoEntries); }
/** * Trains a GIS model on the event in the specified event stream, using the specified number * of iterations and the specified count cutoff. * * @param eventStream A stream of all events. * @param iterations The number of iterations to use for GIS. * @param cutoff The number of times a feature must occur to be included. * @return A GIS model trained with specified */ public GISModel trainModel(ObjectStream<Event> eventStream, int iterations, int cutoff) throws IOException { DataIndexer indexer = new OnePassDataIndexer(); TrainingParameters indexingParameters = new TrainingParameters(); indexingParameters.put(GISTrainer.CUTOFF_PARAM, cutoff); indexingParameters.put(GISTrainer.ITERATIONS_PARAM, iterations); Map<String, String> reportMap = new HashMap<>(); indexer.init(indexingParameters, reportMap); indexer.index(eventStream); return trainModel(iterations, indexer); }