@Override public Instances trainCV(int arg0, int arg1) { return super.trainCV(arg0, arg1); }
@Override public Instances trainCV(int numFolds, int numFold, Random random) { return super.trainCV(numFolds, numFold, random); }
public Instances[][] crossValidationSplit(Instances data, int numberOfFolds) { Instances[][] split = new Instances[2][numberOfFolds]; for (int i = 0; i < numberOfFolds; i++) { split[0][i] = data.trainCV(numberOfFolds, i); split[1][i] = data.testCV(numberOfFolds, i); } return split; }
/** * Creates the training set for one fold of a cross-validation on the dataset. * The data is subsequently randomized based on the given random number * generator. * * @param numFolds the number of folds in the cross-validation. Must be * greater than 1. * @param numFold 0 for the first fold, 1 for the second, ... * @param random the random number generator * @return the training set * @throws IllegalArgumentException if the number of folds is less than 2 or * greater than the number of instances. */ // @ requires 2 <= numFolds && numFolds < numInstances(); // @ requires 0 <= numFold && numFold < numFolds; public Instances trainCV(int numFolds, int numFold, Random random) { Instances train = trainCV(numFolds, numFold); train.randomize(random); return train; }
/** * Creates the training set for one fold of a cross-validation on the dataset. * The data is subsequently randomized based on the given random number * generator. * * @param numFolds the number of folds in the cross-validation. Must be * greater than 1. * @param numFold 0 for the first fold, 1 for the second, ... * @param random the random number generator * @return the training set * @throws IllegalArgumentException if the number of folds is less than 2 or * greater than the number of instances. */ // @ requires 2 <= numFolds && numFolds < numInstances(); // @ requires 0 <= numFold && numFold < numFolds; public Instances trainCV(int numFolds, int numFold, Random random) { Instances train = trainCV(numFolds, numFold); train.randomize(random); return train; }
/** * Generate a bunch of predictions ready for processing, by performing a * cross-validation on the supplied dataset. * * @param classifier the Classifier to evaluate * @param data the dataset * @param numFolds the number of folds in the cross-validation. * @exception Exception if an error occurs */ public ArrayList<Prediction> getCVPredictions(Classifier classifier, Instances data, int numFolds) throws Exception { ArrayList<Prediction> predictions = new ArrayList<Prediction>(); Instances runInstances = new Instances(data); Random random = new Random(m_Seed); runInstances.randomize(random); if (runInstances.classAttribute().isNominal() && (numFolds > 1)) { runInstances.stratify(numFolds); } for (int fold = 0; fold < numFolds; fold++) { Instances train = runInstances.trainCV(numFolds, fold, random); Instances test = runInstances.testCV(numFolds, fold); ArrayList<Prediction> foldPred = getTrainTestPredictions(classifier, train, test); predictions.addAll(foldPred); } return predictions; }
/** * Generate a bunch of predictions ready for processing, by performing a * cross-validation on the supplied dataset. * * @param classifier the Classifier to evaluate * @param data the dataset * @param numFolds the number of folds in the cross-validation. * @exception Exception if an error occurs */ public ArrayList<Prediction> getCVPredictions(Classifier classifier, Instances data, int numFolds) throws Exception { ArrayList<Prediction> predictions = new ArrayList<Prediction>(); Instances runInstances = new Instances(data); Random random = new Random(m_Seed); runInstances.randomize(random); if (runInstances.classAttribute().isNominal() && (numFolds > 1)) { runInstances.stratify(numFolds); } for (int fold = 0; fold < numFolds; fold++) { Instances train = runInstances.trainCV(numFolds, fold, random); Instances test = runInstances.testCV(numFolds, fold); ArrayList<Prediction> foldPred = getTrainTestPredictions(classifier, train, test); predictions.addAll(foldPred); } return predictions; }
private MultipleEvaluation innerCrossValidate(MultiLabelLearner learner, MultiLabelInstances data, boolean hasMeasures, List<Measure> measures, int someFolds) { Evaluation[] evaluation = new Evaluation[someFolds]; Instances workingSet = new Instances(data.getDataSet()); workingSet.randomize(new Random(seed)); for (int i = 0; i < someFolds; i++) { System.out.println("Fold " + (i + 1) + "/" + someFolds); try { Instances train = workingSet.trainCV(someFolds, i); Instances test = workingSet.testCV(someFolds, i); MultiLabelInstances mlTrain = new MultiLabelInstances(train, data.getLabelsMetaData()); MultiLabelInstances mlTest = new MultiLabelInstances(test, data.getLabelsMetaData()); MultiLabelLearner clone = learner.makeCopy(); clone.build(mlTrain); if (hasMeasures) evaluation[i] = evaluate(clone, mlTest, measures); else evaluation[i] = evaluate(clone, mlTest); } catch (Exception ex) { Logger.getLogger(Evaluator.class.getName()).log(Level.SEVERE, null, ex); } } MultipleEvaluation me = new MultipleEvaluation(evaluation, data); me.calculateStatistics(); return me; } }
/** * Perform a cross validation for attribute selection. With subset evaluators * the number of times each attribute is selected over the cross validation is * reported. For attribute evaluators, the average merit and average ranking + * std deviation is reported for each attribute. * * @return the results of cross validation as a String * @exception Exception if an error occurs during cross validation */ public String CrossValidateAttributes() throws Exception { Instances cvData = new Instances(m_trainInstances); Instances train; Random random = new Random(m_seed); cvData.randomize(random); if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) { if (cvData.classAttribute().isNominal()) { cvData.stratify(m_numFolds); } } for (int i = 0; i < m_numFolds; i++) { // Perform attribute selection train = cvData.trainCV(m_numFolds, i, random); selectAttributesCVSplit(train); } return CVResultsString(); }
/** * Perform a cross validation for attribute selection. With subset evaluators * the number of times each attribute is selected over the cross validation is * reported. For attribute evaluators, the average merit and average ranking + * std deviation is reported for each attribute. * * @return the results of cross validation as a String * @exception Exception if an error occurs during cross validation */ public String CrossValidateAttributes() throws Exception { Instances cvData = new Instances(m_trainInstances); Instances train; Random random = new Random(m_seed); cvData.randomize(random); if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) { if (cvData.classAttribute().isNominal()) { cvData.stratify(m_numFolds); } } for (int i = 0; i < m_numFolds; i++) { // Perform attribute selection train = cvData.trainCV(m_numFolds, i, random); selectAttributesCVSplit(train); } return CVResultsString(); }
/** * Method for building a pruneable classifier tree. * * @param data the data to build the tree from * @throws Exception if tree can't be built successfully */ public void buildClassifier(Instances data) throws Exception { // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_seed); data.stratify(numSets); buildTree(data.trainCV(numSets, numSets - 1, random), data.testCV(numSets, numSets - 1), !m_cleanup); if (pruneTheTree) { prune(); } if (m_cleanup) { cleanup(new Instances(data, 0)); } }
/** * Method for building a pruneable classifier tree. * * @param data the data to build the tree from * @throws Exception if tree can't be built successfully */ public void buildClassifier(Instances data) throws Exception { // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_seed); data.stratify(numSets); buildTree(data.trainCV(numSets, numSets - 1, random), data.testCV(numSets, numSets - 1), !m_cleanup); if (pruneTheTree) { prune(); } if (m_cleanup) { cleanup(new Instances(data, 0)); } }
/** * Automatically selects a threshold based on training set performance * evaluated using cross-validation * * @param measure performance is evaluated based on this parameter * @param folds number of cross-validation folds * @throws InvalidDataFormatException * @throws Exception */ private void autoTuneThreshold(MultiLabelInstances trainingData, BipartitionMeasureBase measure, int folds) throws InvalidDataFormatException, Exception { if (folds < 2) { throw new IllegalArgumentException("folds should be more than 1"); } double[] totalDiff = new double[numLabels + 1]; LabelsMetaData labelsMetaData = trainingData.getLabelsMetaData(); MultiLabelLearner tempLearner = foldLearner.makeCopy(); for (int f = 0; f < folds; f++) { Instances train = trainingData.getDataSet().trainCV(folds, f); MultiLabelInstances trainMulti = new MultiLabelInstances(train, labelsMetaData); Instances test = trainingData.getDataSet().testCV(folds, f); MultiLabelInstances testMulti = new MultiLabelInstances(test, labelsMetaData); tempLearner.build(trainMulti); double[] diff = computeThreshold(tempLearner, testMulti, measure); for (int k = 0; k < diff.length; k++) { totalDiff[k] += diff[k]; } } t = Utils.minIndex(totalDiff); }
Instances train = allData.trainCV(m_numFoldsBoosting, i); Instances test = allData.testCV(m_numFoldsBoosting, i);
/** * CVModel - Split D into train/test folds, and then train and evaluate on each one. * @param h a multi-dim. classifier * @param D data * @param numFolds test data * @param top Threshold OPtion (pertains to multi-label data only) * @return an array of 'numFolds' Results */ public static Result[] cvModel(MultilabelClassifier h, Instances D, int numFolds, String top) throws Exception { Result r[] = new Result[numFolds]; for(int i = 0; i < numFolds; i++) { Instances D_train = D.trainCV(numFolds,i); Instances D_test = D.testCV(numFolds,i); if (h.getDebug()) System.out.println(":- Fold ["+i+"/"+numFolds+"] -: "+MLUtils.getDatasetName(D)+"\tL="+D.classIndex()+"\tD(t:T)=("+D_train.numInstances()+":"+D_test.numInstances()+")\tLC(t:T)="+Utils.roundDouble(MLUtils.labelCardinality(D_train,D.classIndex()),2)+":"+Utils.roundDouble(MLUtils.labelCardinality(D_test,D.classIndex()),2)+")"); r[i] = evaluateModel(h, D_train, D_test, top); } return r; }
instances = getInputFormat().testCV(m_NumFolds, m_Fold - 1); } else { instances = getInputFormat().trainCV(m_NumFolds, m_Fold - 1);
m_MetaFormat = new Instances(metaData, 0); for (int j = 0; j < m_NumFolds; j++) { Instances train = newData.trainCV(m_NumFolds, j, random);
protected void buildInternal(MultiLabelInstances trainingData) throws Exception { baseLearner.build(trainingData); if (folds == 0) { threshold = computeThreshold(baseLearner, trainingData, measure); } else { LabelsMetaData labelsMetaData = trainingData.getLabelsMetaData(); double[] thresholds = new double[folds]; for (int f = 0; f < folds; f++) { Instances train = trainingData.getDataSet().trainCV(folds, f); MultiLabelInstances trainMulti = new MultiLabelInstances(train, labelsMetaData); Instances test = trainingData.getDataSet().testCV(folds, f); MultiLabelInstances testMulti = new MultiLabelInstances(test, labelsMetaData); MultiLabelLearner tempLearner = foldLearner.makeCopy(); tempLearner.build(trainMulti); thresholds[f] = computeThreshold(tempLearner, testMulti, measure); } threshold = Utils.mean(thresholds); } }
< kFoldsCV; i++) { Instances train = trainingSet.getDataSet().trainCV(kFoldsCV, i); MultiLabelInstances mlTrain = new MultiLabelInstances(train, trainingSet.getLabelsMetaData()); Instances test = trainingSet.getDataSet().testCV(kFoldsCV, i);
m_MetaFormat = new Instances(metaData, 0); for (int j = 0; j < m_NumFolds; j++) { Instances train = newData.trainCV(m_NumFolds, j, random);