/** * <!-- preExtract(String,Lexicon.CountPolicy) --> Performs labeled feature vector * pre-extraction into the specified file (or memory), replacing {@link #parser} with one that * reads from that file (or memory). If <code>exampleFile</code> already exists, this method * writes the examples to a temporary file, then copies the contents to the existing file after * pre-extraction completes. This is done in case the parser providing the examples to this * method is reading the existing file. * * <p> * Note that this method does <i>not</i> write the feature lexicon it produces to disk. Calling * this method is equivalent to calling {@link #preExtract(String,boolean,Lexicon.CountPolicy)} * with the second argument <code>true</code>. * * @param exampleFile The full path to a file into which examples will be written, or * <code>null</code> to extract into memory. * @param countPolicy The feature counting policy for the learner's feature lexicon. * @return A new learning classifier containing the lexicon built during pre-extraction. **/ public Learner preExtract(String exampleFile, Lexicon.CountPolicy countPolicy) { return preExtract(exampleFile, true, countPolicy); }
/** * <!-- preExtract(String) --> Performs labeled feature vector pre-extraction into the specified * file (or memory), replacing {@link #parser} with one that reads from that file (or memory). * After pre-extraction, the lexicon is written to disk. It is assumed that {@link #learner} * already knows where to write the lexicon. If it doesn't, call * {@link Learner#setLexiconLocation(String)} or * {@link Learner#setLexiconLocation(java.net.URL)} on that object before calling this method. * * <p> * Calling this method is equivalent to calling {@link #preExtract(String,boolean)} with the * second argument <code>true</code>. * * @param exampleFile The full path to a file into which examples will be written, or * <code>null</code> to extract into memory. * @return The resulting lexicon. **/ public Lexicon preExtract(String exampleFile) { return preExtract(exampleFile, true); }
/** * <!-- preExtract(String,boolean) --> Performs labeled feature vector pre-extraction into the * specified file (or memory), replacing {@link #parser} with one that reads from that file (or * memory). After pre-extraction, the lexicon is written to disk. It is assumed that * {@link #learner} already knows where to write the lexicon. If it doesn't, call * {@link Learner#setLexiconLocation(String)} or * {@link Learner#setLexiconLocation(java.net.URL)} on that object before calling this method. * * @param exampleFile The full path to a file into which examples will be written, or * <code>null</code> to extract into memory. * @param zip Whether or not to compress the extracted examples. * @return The resulting lexicon. **/ public Lexicon preExtract(String exampleFile, boolean zip) { Learner preExtractLearner = preExtract(exampleFile, zip, Lexicon.CountPolicy.none); preExtractLearner.saveLexicon(); return preExtractLearner.getLexicon(); }
/** * Parts is the number of parts to which we split the data. in training - if you have a lot of * samples- use 100 partitions otherwise, the zip doesn't work on training files larger than 4G */ private static BatchTrainer prefetchAndGetBatchTrainer(SparseNetworkLearner classifier, Vector<Data> dataSets, String exampleStorePath, ParametersForLbjCode params) { for (int dataId = 0; dataId < dataSets.size(); dataId++) { Data data = dataSets.elementAt(dataId); TextChunkRepresentationManager.changeChunkRepresentation( TextChunkRepresentationManager.EncodingScheme.BIO, params.taggingEncodingScheme, data, NEWord.LabelToLookAt.GoldLabel); } BatchTrainer bt = new BatchTrainer(classifier, new SampleReader(dataSets), 0); logger.debug("setting lexicon from batchtrainer, exampleStorePath is '" + exampleStorePath + "'..."); classifier.setLexicon(bt.preExtract(exampleStorePath)); for (int dataId = 0; dataId < dataSets.size(); dataId++) { Data trainData = dataSets.elementAt(dataId); TextChunkRepresentationManager.changeChunkRepresentation( params.taggingEncodingScheme, TextChunkRepresentationManager.EncodingScheme.BIO, trainData, NEWord.LabelToLookAt.GoldLabel); } return bt; }
/** * Parts is the number of parts to which we split the data. in training - if you have a lot of * samples- use 100 partitions otherwise, the zip doesn't work on training files larger than 4G */ private static BatchTrainer prefetchAndGetBatchTrainer(SparseNetworkLearner classifier, Vector<Data> dataSets, String exampleStorePath, ParametersForLbjCode params) { for (int dataId = 0; dataId < dataSets.size(); dataId++) { Data data = dataSets.elementAt(dataId); TextChunkRepresentationManager.changeChunkRepresentation( TextChunkRepresentationManager.EncodingScheme.BIO, params.taggingEncodingScheme, data, NEWord.LabelToLookAt.GoldLabel); } BatchTrainer bt = new BatchTrainer(classifier, new SampleReader(dataSets), 0); logger.debug("setting lexicon from batchtrainer, exampleStorePath is '" + exampleStorePath + "'..."); classifier.setLexicon(bt.preExtract(exampleStorePath)); for (int dataId = 0; dataId < dataSets.size(); dataId++) { Data trainData = dataSets.elementAt(dataId); TextChunkRepresentationManager.changeChunkRepresentation( params.taggingEncodingScheme, TextChunkRepresentationManager.EncodingScheme.BIO, trainData, NEWord.LabelToLookAt.GoldLabel); } return bt; }
/** * Parts is the number of parts to which we split the data. in training - if you have a lot of * samples- use 100 partitions otherwise, the zip doesn't work on training files larger than 4G */ private static BatchTrainer prefetchAndGetBatchTrainer(SparseNetworkLearner classifier, Vector<Data> dataSets, String exampleStorePath) { for (int dataId = 0; dataId < dataSets.size(); dataId++) { Data data = dataSets.elementAt(dataId); TextChunkRepresentationManager.changeChunkRepresentation( TextChunkRepresentationManager.EncodingScheme.BIO, ParametersForLbjCode.currentParameters.taggingEncodingScheme, data, NEWord.LabelToLookAt.GoldLabel); } BatchTrainer bt = new BatchTrainer(classifier, new SampleReader(dataSets), 0); logger.debug("setting lexicon from batchtrainer, exampleStorePath is '" + exampleStorePath + "'..."); classifier.setLexicon(bt.preExtract(exampleStorePath)); for (int dataId = 0; dataId < dataSets.size(); dataId++) { Data trainData = dataSets.elementAt(dataId); TextChunkRepresentationManager.changeChunkRepresentation( ParametersForLbjCode.currentParameters.taggingEncodingScheme, TextChunkRepresentationManager.EncodingScheme.BIO, trainData, NEWord.LabelToLookAt.GoldLabel); } return bt; }
classifier.setLexiconLocation("models/relation_classifier_all"); BatchTrainer trainer = new BatchTrainer(classifier, train_parser); Learner preExtractLearner = trainer.preExtract("models/relation_classifier_all", true, Lexicon.CountPolicy.none); preExtractLearner.saveLexicon(); Lexicon lexicon = preExtractLearner.getLexicon();
public static extent_classifier train_extent_classifier(ExtentReader train_parser, String prefix){ extent_classifier classifier = new extent_classifier(); String modelFileName = ""; if (prefix == null){ String postfix = train_parser.getId(); modelFileName = "tmp/extent_classifier_" + postfix; } else{ modelFileName = prefix; } classifier.setLexiconLocation(modelFileName + ".lex"); BatchTrainer trainer = new BatchTrainer(classifier, train_parser); Lexicon lexicon = trainer.preExtract(modelFileName + ".ex", true); classifier.setLexicon(lexicon); classifier.setModelLocation(modelFileName + ".lc"); trainer.train(1); classifier.saveModel(); return classifier; }
public static extent_classifier train_extent_classifier(ExtentReader train_parser, String prefix){ extent_classifier classifier = new extent_classifier(); String modelFileName = ""; if (prefix == null){ String postfix = train_parser.getId(); modelFileName = "tmp/extent_classifier_" + postfix; } else{ modelFileName = prefix; } classifier.setLexiconLocation(modelFileName + ".lex"); BatchTrainer trainer = new BatchTrainer(classifier, train_parser); Lexicon lexicon = trainer.preExtract(modelFileName + ".ex", true); classifier.setLexicon(lexicon); classifier.setModelLocation(modelFileName + ".lc"); trainer.train(1); classifier.saveModel(); return classifier; }
public static extent_classifier train_extent_classifier(ExtentReader train_parser, String prefix){ extent_classifier classifier = new extent_classifier(); String modelFileName = ""; if (prefix == null){ String postfix = train_parser.getId(); modelFileName = "tmp/extent_classifier_" + postfix; } else{ modelFileName = prefix; } classifier.setLexiconLocation(modelFileName + ".lex"); BatchTrainer trainer = new BatchTrainer(classifier, train_parser); Lexicon lexicon = trainer.preExtract(modelFileName + ".ex", true); classifier.setLexicon(lexicon); classifier.setModelLocation(modelFileName + ".lc"); trainer.train(1); classifier.saveModel(); return classifier; }
public static void generateModel(String serializedDataInput, String modelLoc){ ACEMentionReader train_parser = IOHelper.serializeDataIn(serializedDataInput); relation_classifier classifier = new relation_classifier(); classifier.setLexiconLocation(modelLoc + ".lex"); BatchTrainer trainer = new BatchTrainer(classifier, train_parser); Learner preExtractLearner = trainer.preExtract(modelLoc + ".ex", true, Lexicon.CountPolicy.none); preExtractLearner.saveLexicon(); Lexicon lexicon = preExtractLearner.getLexicon(); classifier.setLexicon(lexicon); int examples = train_parser.relations_bi.size(); classifier.initialize(examples, preExtractLearner.getLexicon().size()); for (Relation r : train_parser.relations_bi){ classifier.learn(r); } classifier.doneWithRound(); classifier.doneLearning(); classifier.setModelLocation(modelLoc + ".lc"); classifier.saveModel(); }
learner.forget(); BatchTrainer bt = new BatchTrainer(learner, foldParser); Lexicon lexicon = bt.preExtract(null); learner.setLexicon(lexicon); bt.train(250);
learner.forget(); BatchTrainer bt = new BatchTrainer(learner, foldParser); Lexicon lexicon = bt.preExtract(null); learner.setLexicon(lexicon); bt.train(250);
BatchTrainer bt = new BatchTrainer(learner, foldParser); Comma.useGoldFeatures(trainOnGold); Lexicon lexicon = bt.preExtract(null); learner.setLexicon(lexicon); bt.train(learningRounds);
BatchTrainer bt = new BatchTrainer(learner, foldParser); Comma.useGoldFeatures(trainOnGold); Lexicon lexicon = bt.preExtract(null); learner.setLexicon(lexicon); bt.train(learningRounds);
Learner preExtractLearner = trainer.preExtract(modelFileName + ".ex", true, Lexicon.CountPolicy.none); preExtractLearner.saveLexicon(); Lexicon lexicon = preExtractLearner.getLexicon();
Learner preExtractLearner = trainer.preExtract(modelFileName + ".ex", true, Lexicon.CountPolicy.none); preExtractLearner.saveLexicon(); Lexicon lexicon = preExtractLearner.getLexicon();
Learner preExtractLearner = trainer.preExtract(modelFileName + ".ex", true, Lexicon.CountPolicy.none); preExtractLearner.saveLexicon(); Lexicon lexicon = preExtractLearner.getLexicon();
Learner preExtractLearner = trainer.preExtract(modelFileName + ".ex", true, Lexicon.CountPolicy.none); preExtractLearner.saveLexicon(); Lexicon lexicon = preExtractLearner.getLexicon();
Learner preExtractLearner = trainer.preExtract(modelFileName + ".ex", true, Lexicon.CountPolicy.none); preExtractLearner.saveLexicon(); Lexicon lexicon = preExtractLearner.getLexicon();