edu.illinois.cs.cogcomp.lbjava.learn.BatchTrainer java code examples

/**
 * Parts is the number of parts to which we split the data. in training - if you have a lot of
 * samples- use 100 partitions otherwise, the zip doesn't work on training files larger than 4G
 */
private static BatchTrainer prefetchAndGetBatchTrainer(SparseNetworkLearner classifier,
    Vector<Data> dataSets, String exampleStorePath, ParametersForLbjCode params) {
  for (int dataId = 0; dataId < dataSets.size(); dataId++) {
    Data data = dataSets.elementAt(dataId);
    TextChunkRepresentationManager.changeChunkRepresentation(
        TextChunkRepresentationManager.EncodingScheme.BIO,
        params.taggingEncodingScheme, data,
        NEWord.LabelToLookAt.GoldLabel);
  }
  BatchTrainer bt = new BatchTrainer(classifier, new SampleReader(dataSets), 0);
  logger.debug("setting lexicon from batchtrainer, exampleStorePath is '" + exampleStorePath
      + "'...");
  classifier.setLexicon(bt.preExtract(exampleStorePath));
  for (int dataId = 0; dataId < dataSets.size(); dataId++) {
    Data trainData = dataSets.elementAt(dataId);
    TextChunkRepresentationManager.changeChunkRepresentation(
        params.taggingEncodingScheme,
        TextChunkRepresentationManager.EncodingScheme.BIO, trainData,
        NEWord.LabelToLookAt.GoldLabel);
  }
  return bt;
}

public static extent_classifier train_extent_classifier(ExtentReader train_parser, String prefix){
  extent_classifier classifier = new extent_classifier();
  String modelFileName = "";
  if (prefix == null){
    String postfix = train_parser.getId();
    modelFileName = "tmp/extent_classifier_" +  postfix;
  }
  else{
    modelFileName = prefix;
  }
  classifier.setLexiconLocation(modelFileName + ".lex");
  BatchTrainer trainer = new BatchTrainer(classifier, train_parser);
  Lexicon lexicon = trainer.preExtract(modelFileName + ".ex", true);
  classifier.setLexicon(lexicon);
  classifier.setModelLocation(modelFileName + ".lc");
  trainer.train(1);
  classifier.saveModel();
  return classifier;
}

public void train() {
  QuantitiesClassifier classifier =
      new QuantitiesClassifier(modelName + ".lc", modelName + ".lex");
  QuantitiesDataReader trainReader =
      new QuantitiesDataReader(dataDir + "/train.txt", "train");
  BatchTrainer trainer = new BatchTrainer(classifier, trainReader);
  trainer.train(45);
  classifier.save();
}

    lexicon = trainer.preExtract(exFilePath, preExtractZip);
  else if (lce.pruneStatus != RevisionAnalysis.UNAFFECTED)
    lexicon = learner.getLexiconDiscardCounts();
  else
    trainer.fillInSizes();
} else if (lce.featuresStatus != RevisionAnalysis.UNAFFECTED
    || lce.pruneStatus != RevisionAnalysis.UNAFFECTED
    && lce.previousPruneCountType == null)
  preExtractLearner = trainer.preExtract(exFilePath, preExtractZip, countPolicy);
else if (lce.previousPruneCountType != null
    && !lce.previousPruneCountType.equals(lce.pruneCountType)) {
  if (lce.previousPruneCountType.value.equals("\"global\""))
    preExtractLearner = trainer.preExtract(exFilePath, preExtractZip, countPolicy);
  else
  trainer.pruneDataset(exFilePath, preExtractZip, pruningPolicy, preExtractLearner);
  lexicon = preExtractLearner.getLexicon();
  if (preExtractLearner == learner)
      new BatchTrainer(learner, testParser, trainer.getProgressOutput(),
          "test set: ");
  preExtractor.preExtract(testExFilePath, preExtractZip, Lexicon.CountPolicy.none);
  testParser = preExtractor.getParser();

logger.info("Pre-extracting the testing data for Level 1 classifier, saving to "+testPathL1);
BatchTrainer bt1test = prefetchAndGetBatchTrainer(tagger1, testDataSet, testPathL1);
Parser testParser1 = bt1test.getParser();
  bt1train.train(1);
  testParser1.reset();
  TestDiscrete simpleTest = new TestDiscrete();
  BatchTrainer bt2test =
      prefetchAndGetBatchTrainer(tagger2, testDataSet, testPathL2);
  Parser testParser2 = bt2test.getParser();
    bt2train.train(1);
    logger.info("Testing level 2 classifier;  on prefetched data, round: " + i);
    testParser2.reset();

/**
 * <!-- train(int,DoneWithRound) --> Trains {@link #learner} for the specified number of rounds.
 * This learning happens on top of any learning that {@link #learner} may have already done.
 *
 * @param rounds The number of passes to make over the training data.
 * @param dwr Performs post processing at the end of each round.
 **/
public void train(int rounds, DoneWithRound dwr) {
  train(1, rounds, dwr);
}

/**
 * <!-- preExtract(String,Lexicon.CountPolicy) --> Performs labeled feature vector
 * pre-extraction into the specified file (or memory), replacing {@link #parser} with one that
 * reads from that file (or memory). If <code>exampleFile</code> already exists, this method
 * writes the examples to a temporary file, then copies the contents to the existing file after
 * pre-extraction completes. This is done in case the parser providing the examples to this
 * method is reading the existing file.
 *
 * <p>
 * Note that this method does <i>not</i> write the feature lexicon it produces to disk. Calling
 * this method is equivalent to calling {@link #preExtract(String,boolean,Lexicon.CountPolicy)}
 * with the second argument <code>true</code>.
 *
 * @param exampleFile The full path to a file into which examples will be written, or
 *        <code>null</code> to extract into memory.
 * @param countPolicy The feature counting policy for the learner's feature lexicon.
 * @return A new learning classifier containing the lexicon built during pre-extraction.
 **/
public Learner preExtract(String exampleFile, Lexicon.CountPolicy countPolicy) {
  return preExtract(exampleFile, true, countPolicy);
}

messageIndent += "  ";
train(totalRounds, new DoneWithRound() {
  int r = 0;
    crossValidationTesting(foldParser, metric, false, statusMessages);
messageIndent = messageIndent.substring(2);

messageIndent += "  ";
train(totalRounds, new DoneWithRound() {
  int r = 0;
results[rounds.length - 1] = testMidTraining(devParser, metric, false);
messageIndent = messageIndent.substring(2);

      int k = Integer.parseInt(lce.K.value);
      double alpha = Double.parseDouble(lce.alpha.value);
      trainer.crossValidation(rounds, k, lce.splitPolicy, alpha,
          testingMetric, true);
      System.out.println("  " + getName()
  trainer.train(lce.startingRound, trainingRounds);
} else

  progressOutput = Integer.parseInt(lce.progressOutput.value);
trainer = new BatchTrainer(learner, parser, progressOutput);

double[][] results = crossValidation(rounds, k, splitPolicy, alpha, metric, false);
messageIndent = messageIndent.substring(2);

public void train() {
  QuantitiesClassifier classifier =
      new QuantitiesClassifier(modelName + ".lc", modelName + ".lex");
  QuantitiesDataReader trainReader =
      new QuantitiesDataReader(dataDir + "/train.txt", "train");
  BatchTrainer trainer = new BatchTrainer(classifier, trainReader);
  trainer.train(45);
  classifier.save();
}

logger.info("Pre-extracting the testing data for Level 1 classifier, saving to "+testPathL1);
BatchTrainer bt1test = prefetchAndGetBatchTrainer(tagger1, testDataSet, testPathL1, params);
Parser testParser1 = bt1test.getParser();
  for (int i = 0; (fixedNumIterations == -1 && i < 200 && i - bestRoundLevel1 < 10)
      || (fixedNumIterations > 0 && i <= fixedNumIterations); ++i) {
    bt1train.train(1);
    testParser1.reset();
    TestDiscrete simpleTest = new TestDiscrete();
  BatchTrainer bt2test =
      prefetchAndGetBatchTrainer(tagger2, testDataSet, testPathL2, params);
  Parser testParser2 = bt2test.getParser();
        || (fixedNumIterations > 0 && i <= fixedNumIterations); ++i) {
      logger.info("Learning level 2 classifier; round " + i);
      bt2train.train(1);
      logger.info("Testing level 2 classifier;  on prefetched data, round: " + i);
      testParser2.reset();

/**
 * <!-- train(int,int) --> Trains {@link #learner} for the specified number of rounds. This
 * learning happens on top of any learning that {@link #learner} may have already done.
 *
 * @param start The 1-based number of the first training round.
 * @param rounds The total number of training rounds including those before <code>start</code>.
 **/
public void train(int start, int rounds) {
  train(start, rounds, new DoneWithRound() {
    public void doneWithRound(int r) {}
  });
}

/**
 * <!-- preExtract(String) --> Performs labeled feature vector pre-extraction into the specified
 * file (or memory), replacing {@link #parser} with one that reads from that file (or memory).
 * After pre-extraction, the lexicon is written to disk. It is assumed that {@link #learner}
 * already knows where to write the lexicon. If it doesn't, call
 * {@link Learner#setLexiconLocation(String)} or
 * {@link Learner#setLexiconLocation(java.net.URL)} on that object before calling this method.
 *
 * <p>
 * Calling this method is equivalent to calling {@link #preExtract(String,boolean)} with the
 * second argument <code>true</code>.
 *
 * @param exampleFile The full path to a file into which examples will be written, or
 *        <code>null</code> to extract into memory.
 * @return The resulting lexicon.
 **/
public Lexicon preExtract(String exampleFile) {
  return preExtract(exampleFile, true);
}

/**
 * Parts is the number of parts to which we split the data. in training - if you have a lot of
 * samples- use 100 partitions otherwise, the zip doesn't work on training files larger than 4G
 */
private static BatchTrainer prefetchAndGetBatchTrainer(SparseNetworkLearner classifier,
    Vector<Data> dataSets, String exampleStorePath, ParametersForLbjCode params) {
  for (int dataId = 0; dataId < dataSets.size(); dataId++) {
    Data data = dataSets.elementAt(dataId);
    TextChunkRepresentationManager.changeChunkRepresentation(
        TextChunkRepresentationManager.EncodingScheme.BIO,
        params.taggingEncodingScheme, data,
        NEWord.LabelToLookAt.GoldLabel);
  }
  BatchTrainer bt = new BatchTrainer(classifier, new SampleReader(dataSets), 0);
  logger.debug("setting lexicon from batchtrainer, exampleStorePath is '" + exampleStorePath
      + "'...");
  classifier.setLexicon(bt.preExtract(exampleStorePath));
  for (int dataId = 0; dataId < dataSets.size(); dataId++) {
    Data trainData = dataSets.elementAt(dataId);
    TextChunkRepresentationManager.changeChunkRepresentation(
        params.taggingEncodingScheme,
        TextChunkRepresentationManager.EncodingScheme.BIO, trainData,
        NEWord.LabelToLookAt.GoldLabel);
  }
  return bt;
}

public static extent_classifier train_extent_classifier(ExtentReader train_parser, String prefix){
  extent_classifier classifier = new extent_classifier();
  String modelFileName = "";
  if (prefix == null){
    String postfix = train_parser.getId();
    modelFileName = "tmp/extent_classifier_" +  postfix;
  }
  else{
    modelFileName = prefix;
  }
  classifier.setLexiconLocation(modelFileName + ".lex");
  BatchTrainer trainer = new BatchTrainer(classifier, train_parser);
  Lexicon lexicon = trainer.preExtract(modelFileName + ".ex", true);
  classifier.setLexicon(lexicon);
  classifier.setModelLocation(modelFileName + ".lc");
  trainer.train(1);
  classifier.saveModel();
  return classifier;
}

public void trainOnAll() {
  QuantitiesClassifier classifier =
      new QuantitiesClassifier(modelName + ".lc", modelName + ".lex");
  QuantitiesDataReader trainReader =
      new QuantitiesDataReader(dataDir + "/allData.txt", "train");
  BatchTrainer trainer = new BatchTrainer(classifier, trainReader);
  trainer.train(45);
  classifier.save();
}

logger.info("Pre-extracting the testing data for Level 1 classifier, saving to "+testPathL1);
BatchTrainer bt1test = prefetchAndGetBatchTrainer(tagger1, testDataSet, testPathL1, params);
Parser testParser1 = bt1test.getParser();
  for (int i = 0; (fixedNumIterations == -1 && i < 200 && i - bestRoundLevel1 < 10)
      || (fixedNumIterations > 0 && i <= fixedNumIterations); ++i) {
    bt1train.train(1);
    testParser1.reset();
    TestDiscrete simpleTest = new TestDiscrete();
  BatchTrainer bt2test =
      prefetchAndGetBatchTrainer(tagger2, testDataSet, testPathL2, params);
  Parser testParser2 = bt2test.getParser();
        || (fixedNumIterations > 0 && i <= fixedNumIterations); ++i) {
      logger.info("Learning level 2 classifier; round " + i);
      bt2train.train(1);
      logger.info("Testing level 2 classifier;  on prefetched data, round: " + i);
      testParser2.reset();

Javadoc

Use this class to batch train a Learner.

Most used methods

<init>
Creates a new trainer that produces status messages with the specified indentation spacing for stat
train
Trains #learner for the specified number of rounds. This learning happens on top of any learning th
preExtract
Performs labeled feature vector pre-extraction into the specified file (or memory), replacing #pars
getParser
Returns the value of #parser.
crossValidation
Performs cross validation, computing a confidence interval on the performance of the learner after
crossValidationTesting
Tests the learner as a subroutine inside cross validation.
fillInSizes
This method sets the #examples and #lexiconSizevariables by querying #parser and #learner respectiv
getProgressOutput
Returns the value of #progressOutput.
pruneDataset
Prunes the data returned by #parser according to the given policy, under the assumption that featur
setIsTraining
Sets the static isTraining flag inside #learner's runtime class to the specified value. This probab
testMidTraining
Tests #learner on the specified data while making provisions under the assumption that this test ha
tune
Tune learning algorithm parameters against a development set. Note that this interface takes both a

Popular in Java

Running tasks concurrently on multiple threads
onRequestPermissionsResult (Fragment)
addToBackStack (FragmentTransaction)
getSharedPreferences (Context)
FileNotFoundException (java.io)
Thrown when a file specified by a program cannot be found.
Collections (java.util)
This class consists exclusively of static methods that operate on or return collections. It contains
Locale (java.util)
Locale represents a language/country/variant combination. Locales are used to alter the presentatio
Queue (java.util)
A collection designed for holding elements prior to processing. Besides basic java.util.Collection o
ThreadPoolExecutor (java.util.concurrent)
An ExecutorService that executes each submitted task using one of possibly several pooled threads, n
JTable (javax.swing)
Top Vim plugins

How to useBatchTrainer in edu.illinois.cs.cogcomp.lbjava.learn

Best Java code snippets using edu.illinois.cs.cogcomp.lbjava.learn.BatchTrainer (Showing top 20 results out of 315)

How to use
BatchTrainer
in
edu.illinois.cs.cogcomp.lbjava.learn