/** * Parts is the number of parts to which we split the data. in training - if you have a lot of * samples- use 100 partitions otherwise, the zip doesn't work on training files larger than 4G */ private static BatchTrainer prefetchAndGetBatchTrainer(SparseNetworkLearner classifier, Vector<Data> dataSets, String exampleStorePath, ParametersForLbjCode params) { for (int dataId = 0; dataId < dataSets.size(); dataId++) { Data data = dataSets.elementAt(dataId); TextChunkRepresentationManager.changeChunkRepresentation( TextChunkRepresentationManager.EncodingScheme.BIO, params.taggingEncodingScheme, data, NEWord.LabelToLookAt.GoldLabel); } BatchTrainer bt = new BatchTrainer(classifier, new SampleReader(dataSets), 0); logger.debug("setting lexicon from batchtrainer, exampleStorePath is '" + exampleStorePath + "'..."); classifier.setLexicon(bt.preExtract(exampleStorePath)); for (int dataId = 0; dataId < dataSets.size(); dataId++) { Data trainData = dataSets.elementAt(dataId); TextChunkRepresentationManager.changeChunkRepresentation( params.taggingEncodingScheme, TextChunkRepresentationManager.EncodingScheme.BIO, trainData, NEWord.LabelToLookAt.GoldLabel); } return bt; }
public static extent_classifier train_extent_classifier(ExtentReader train_parser, String prefix){ extent_classifier classifier = new extent_classifier(); String modelFileName = ""; if (prefix == null){ String postfix = train_parser.getId(); modelFileName = "tmp/extent_classifier_" + postfix; } else{ modelFileName = prefix; } classifier.setLexiconLocation(modelFileName + ".lex"); BatchTrainer trainer = new BatchTrainer(classifier, train_parser); Lexicon lexicon = trainer.preExtract(modelFileName + ".ex", true); classifier.setLexicon(lexicon); classifier.setModelLocation(modelFileName + ".lc"); trainer.train(1); classifier.saveModel(); return classifier; }
public void train() { QuantitiesClassifier classifier = new QuantitiesClassifier(modelName + ".lc", modelName + ".lex"); QuantitiesDataReader trainReader = new QuantitiesDataReader(dataDir + "/train.txt", "train"); BatchTrainer trainer = new BatchTrainer(classifier, trainReader); trainer.train(45); classifier.save(); }
lexicon = trainer.preExtract(exFilePath, preExtractZip); else if (lce.pruneStatus != RevisionAnalysis.UNAFFECTED) lexicon = learner.getLexiconDiscardCounts(); else trainer.fillInSizes(); } else if (lce.featuresStatus != RevisionAnalysis.UNAFFECTED || lce.pruneStatus != RevisionAnalysis.UNAFFECTED && lce.previousPruneCountType == null) preExtractLearner = trainer.preExtract(exFilePath, preExtractZip, countPolicy); else if (lce.previousPruneCountType != null && !lce.previousPruneCountType.equals(lce.pruneCountType)) { if (lce.previousPruneCountType.value.equals("\"global\"")) preExtractLearner = trainer.preExtract(exFilePath, preExtractZip, countPolicy); else trainer.pruneDataset(exFilePath, preExtractZip, pruningPolicy, preExtractLearner); lexicon = preExtractLearner.getLexicon(); if (preExtractLearner == learner) new BatchTrainer(learner, testParser, trainer.getProgressOutput(), "test set: "); preExtractor.preExtract(testExFilePath, preExtractZip, Lexicon.CountPolicy.none); testParser = preExtractor.getParser();
logger.info("Pre-extracting the testing data for Level 1 classifier, saving to "+testPathL1); BatchTrainer bt1test = prefetchAndGetBatchTrainer(tagger1, testDataSet, testPathL1); Parser testParser1 = bt1test.getParser(); bt1train.train(1); testParser1.reset(); TestDiscrete simpleTest = new TestDiscrete(); BatchTrainer bt2test = prefetchAndGetBatchTrainer(tagger2, testDataSet, testPathL2); Parser testParser2 = bt2test.getParser(); bt2train.train(1); logger.info("Testing level 2 classifier; on prefetched data, round: " + i); testParser2.reset();
/** * <!-- train(int,DoneWithRound) --> Trains {@link #learner} for the specified number of rounds. * This learning happens on top of any learning that {@link #learner} may have already done. * * @param rounds The number of passes to make over the training data. * @param dwr Performs post processing at the end of each round. **/ public void train(int rounds, DoneWithRound dwr) { train(1, rounds, dwr); }
/** * <!-- preExtract(String,Lexicon.CountPolicy) --> Performs labeled feature vector * pre-extraction into the specified file (or memory), replacing {@link #parser} with one that * reads from that file (or memory). If <code>exampleFile</code> already exists, this method * writes the examples to a temporary file, then copies the contents to the existing file after * pre-extraction completes. This is done in case the parser providing the examples to this * method is reading the existing file. * * <p> * Note that this method does <i>not</i> write the feature lexicon it produces to disk. Calling * this method is equivalent to calling {@link #preExtract(String,boolean,Lexicon.CountPolicy)} * with the second argument <code>true</code>. * * @param exampleFile The full path to a file into which examples will be written, or * <code>null</code> to extract into memory. * @param countPolicy The feature counting policy for the learner's feature lexicon. * @return A new learning classifier containing the lexicon built during pre-extraction. **/ public Learner preExtract(String exampleFile, Lexicon.CountPolicy countPolicy) { return preExtract(exampleFile, true, countPolicy); }
messageIndent += " "; train(totalRounds, new DoneWithRound() { int r = 0; crossValidationTesting(foldParser, metric, false, statusMessages); messageIndent = messageIndent.substring(2);
messageIndent += " "; train(totalRounds, new DoneWithRound() { int r = 0; results[rounds.length - 1] = testMidTraining(devParser, metric, false); messageIndent = messageIndent.substring(2);
int k = Integer.parseInt(lce.K.value); double alpha = Double.parseDouble(lce.alpha.value); trainer.crossValidation(rounds, k, lce.splitPolicy, alpha, testingMetric, true); System.out.println(" " + getName() trainer.train(lce.startingRound, trainingRounds); } else
progressOutput = Integer.parseInt(lce.progressOutput.value); trainer = new BatchTrainer(learner, parser, progressOutput);
double[][] results = crossValidation(rounds, k, splitPolicy, alpha, metric, false); messageIndent = messageIndent.substring(2);
public void train() { QuantitiesClassifier classifier = new QuantitiesClassifier(modelName + ".lc", modelName + ".lex"); QuantitiesDataReader trainReader = new QuantitiesDataReader(dataDir + "/train.txt", "train"); BatchTrainer trainer = new BatchTrainer(classifier, trainReader); trainer.train(45); classifier.save(); }
logger.info("Pre-extracting the testing data for Level 1 classifier, saving to "+testPathL1); BatchTrainer bt1test = prefetchAndGetBatchTrainer(tagger1, testDataSet, testPathL1, params); Parser testParser1 = bt1test.getParser(); for (int i = 0; (fixedNumIterations == -1 && i < 200 && i - bestRoundLevel1 < 10) || (fixedNumIterations > 0 && i <= fixedNumIterations); ++i) { bt1train.train(1); testParser1.reset(); TestDiscrete simpleTest = new TestDiscrete(); BatchTrainer bt2test = prefetchAndGetBatchTrainer(tagger2, testDataSet, testPathL2, params); Parser testParser2 = bt2test.getParser(); || (fixedNumIterations > 0 && i <= fixedNumIterations); ++i) { logger.info("Learning level 2 classifier; round " + i); bt2train.train(1); logger.info("Testing level 2 classifier; on prefetched data, round: " + i); testParser2.reset();
/** * <!-- train(int,int) --> Trains {@link #learner} for the specified number of rounds. This * learning happens on top of any learning that {@link #learner} may have already done. * * @param start The 1-based number of the first training round. * @param rounds The total number of training rounds including those before <code>start</code>. **/ public void train(int start, int rounds) { train(start, rounds, new DoneWithRound() { public void doneWithRound(int r) {} }); }
/** * <!-- preExtract(String) --> Performs labeled feature vector pre-extraction into the specified * file (or memory), replacing {@link #parser} with one that reads from that file (or memory). * After pre-extraction, the lexicon is written to disk. It is assumed that {@link #learner} * already knows where to write the lexicon. If it doesn't, call * {@link Learner#setLexiconLocation(String)} or * {@link Learner#setLexiconLocation(java.net.URL)} on that object before calling this method. * * <p> * Calling this method is equivalent to calling {@link #preExtract(String,boolean)} with the * second argument <code>true</code>. * * @param exampleFile The full path to a file into which examples will be written, or * <code>null</code> to extract into memory. * @return The resulting lexicon. **/ public Lexicon preExtract(String exampleFile) { return preExtract(exampleFile, true); }
/** * Parts is the number of parts to which we split the data. in training - if you have a lot of * samples- use 100 partitions otherwise, the zip doesn't work on training files larger than 4G */ private static BatchTrainer prefetchAndGetBatchTrainer(SparseNetworkLearner classifier, Vector<Data> dataSets, String exampleStorePath, ParametersForLbjCode params) { for (int dataId = 0; dataId < dataSets.size(); dataId++) { Data data = dataSets.elementAt(dataId); TextChunkRepresentationManager.changeChunkRepresentation( TextChunkRepresentationManager.EncodingScheme.BIO, params.taggingEncodingScheme, data, NEWord.LabelToLookAt.GoldLabel); } BatchTrainer bt = new BatchTrainer(classifier, new SampleReader(dataSets), 0); logger.debug("setting lexicon from batchtrainer, exampleStorePath is '" + exampleStorePath + "'..."); classifier.setLexicon(bt.preExtract(exampleStorePath)); for (int dataId = 0; dataId < dataSets.size(); dataId++) { Data trainData = dataSets.elementAt(dataId); TextChunkRepresentationManager.changeChunkRepresentation( params.taggingEncodingScheme, TextChunkRepresentationManager.EncodingScheme.BIO, trainData, NEWord.LabelToLookAt.GoldLabel); } return bt; }
public static extent_classifier train_extent_classifier(ExtentReader train_parser, String prefix){ extent_classifier classifier = new extent_classifier(); String modelFileName = ""; if (prefix == null){ String postfix = train_parser.getId(); modelFileName = "tmp/extent_classifier_" + postfix; } else{ modelFileName = prefix; } classifier.setLexiconLocation(modelFileName + ".lex"); BatchTrainer trainer = new BatchTrainer(classifier, train_parser); Lexicon lexicon = trainer.preExtract(modelFileName + ".ex", true); classifier.setLexicon(lexicon); classifier.setModelLocation(modelFileName + ".lc"); trainer.train(1); classifier.saveModel(); return classifier; }
public void trainOnAll() { QuantitiesClassifier classifier = new QuantitiesClassifier(modelName + ".lc", modelName + ".lex"); QuantitiesDataReader trainReader = new QuantitiesDataReader(dataDir + "/allData.txt", "train"); BatchTrainer trainer = new BatchTrainer(classifier, trainReader); trainer.train(45); classifier.save(); }
logger.info("Pre-extracting the testing data for Level 1 classifier, saving to "+testPathL1); BatchTrainer bt1test = prefetchAndGetBatchTrainer(tagger1, testDataSet, testPathL1, params); Parser testParser1 = bt1test.getParser(); for (int i = 0; (fixedNumIterations == -1 && i < 200 && i - bestRoundLevel1 < 10) || (fixedNumIterations > 0 && i <= fixedNumIterations); ++i) { bt1train.train(1); testParser1.reset(); TestDiscrete simpleTest = new TestDiscrete(); BatchTrainer bt2test = prefetchAndGetBatchTrainer(tagger2, testDataSet, testPathL2, params); Parser testParser2 = bt2test.getParser(); || (fixedNumIterations > 0 && i <= fixedNumIterations); ++i) { logger.info("Learning level 2 classifier; round " + i); bt2train.train(1); logger.info("Testing level 2 classifier; on prefetched data, round: " + i); testParser2.reset();