@Override public void stratify(int arg0) { super.stratify(arg0); }
Instances dataSet = ...; dataSet.stratify(numOfFolds); // use this //before splitting the dataset into train and test set!
/** * Generate a bunch of predictions ready for processing, by performing a * cross-validation on the supplied dataset. * * @param classifier the Classifier to evaluate * @param data the dataset * @param numFolds the number of folds in the cross-validation. * @exception Exception if an error occurs */ public ArrayList<Prediction> getCVPredictions(Classifier classifier, Instances data, int numFolds) throws Exception { ArrayList<Prediction> predictions = new ArrayList<Prediction>(); Instances runInstances = new Instances(data); Random random = new Random(m_Seed); runInstances.randomize(random); if (runInstances.classAttribute().isNominal() && (numFolds > 1)) { runInstances.stratify(numFolds); } for (int fold = 0; fold < numFolds; fold++) { Instances train = runInstances.trainCV(numFolds, fold, random); Instances test = runInstances.testCV(numFolds, fold); ArrayList<Prediction> foldPred = getTrainTestPredictions(classifier, train, test); predictions.addAll(foldPred); } return predictions; }
/** * Generate a bunch of predictions ready for processing, by performing a * cross-validation on the supplied dataset. * * @param classifier the Classifier to evaluate * @param data the dataset * @param numFolds the number of folds in the cross-validation. * @exception Exception if an error occurs */ public ArrayList<Prediction> getCVPredictions(Classifier classifier, Instances data, int numFolds) throws Exception { ArrayList<Prediction> predictions = new ArrayList<Prediction>(); Instances runInstances = new Instances(data); Random random = new Random(m_Seed); runInstances.randomize(random); if (runInstances.classAttribute().isNominal() && (numFolds > 1)) { runInstances.stratify(numFolds); } for (int fold = 0; fold < numFolds; fold++) { Instances train = runInstances.trainCV(numFolds, fold, random); Instances test = runInstances.testCV(numFolds, fold); ArrayList<Prediction> foldPred = getTrainTestPredictions(classifier, train, test); predictions.addAll(foldPred); } return predictions; }
/** * Perform a cross validation for attribute selection. With subset evaluators * the number of times each attribute is selected over the cross validation is * reported. For attribute evaluators, the average merit and average ranking + * std deviation is reported for each attribute. * * @return the results of cross validation as a String * @exception Exception if an error occurs during cross validation */ public String CrossValidateAttributes() throws Exception { Instances cvData = new Instances(m_trainInstances); Instances train; Random random = new Random(m_seed); cvData.randomize(random); if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) { if (cvData.classAttribute().isNominal()) { cvData.stratify(m_numFolds); } } for (int i = 0; i < m_numFolds; i++) { // Perform attribute selection train = cvData.trainCV(m_numFolds, i, random); selectAttributesCVSplit(train); } return CVResultsString(); }
/** * Method for building a pruneable classifier tree. * * @param data the data to build the tree from * @throws Exception if tree can't be built successfully */ public void buildClassifier(Instances data) throws Exception { // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_seed); data.stratify(numSets); buildTree(data.trainCV(numSets, numSets - 1, random), data.testCV(numSets, numSets - 1), !m_cleanup); if (pruneTheTree) { prune(); } if (m_cleanup) { cleanup(new Instances(data, 0)); } }
/** * Perform a cross validation for attribute selection. With subset evaluators * the number of times each attribute is selected over the cross validation is * reported. For attribute evaluators, the average merit and average ranking + * std deviation is reported for each attribute. * * @return the results of cross validation as a String * @exception Exception if an error occurs during cross validation */ public String CrossValidateAttributes() throws Exception { Instances cvData = new Instances(m_trainInstances); Instances train; Random random = new Random(m_seed); cvData.randomize(random); if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) { if (cvData.classAttribute().isNominal()) { cvData.stratify(m_numFolds); } } for (int i = 0; i < m_numFolds; i++) { // Perform attribute selection train = cvData.trainCV(m_numFolds, i, random); selectAttributesCVSplit(train); } return CVResultsString(); }
/** * Method for building a pruneable classifier tree. * * @param data the data to build the tree from * @throws Exception if tree can't be built successfully */ public void buildClassifier(Instances data) throws Exception { // remove instances with missing class data = new Instances(data); data.deleteWithMissingClass(); Random random = new Random(m_seed); data.stratify(numSets); buildTree(data.trainCV(numSets, numSets - 1, random), data.testCV(numSets, numSets - 1), !m_cleanup); if (pruneTheTree) { prune(); } if (m_cleanup) { cleanup(new Instances(data, 0)); } }
newData.randomize(random); if (newData.classAttribute().isNominal()) { newData.stratify(m_NumFolds);
newData.randomize(random); if (newData.classAttribute().isNominal()) { newData.stratify(m_NumFolds);
trainData.stratify(m_NumFolds);
trainData.stratify(m_NumFolds);
allData.stratify(m_numFoldsBoosting);
allData.stratify(m_numFoldsBoosting);
data.stratify(m_Folds);
getInputFormat().stratify(m_NumFolds); if (!m_Inverse) { instances = getInputFormat().testCV(m_NumFolds, m_Fold - 1);
getInputFormat().stratify(m_NumFolds); if (!m_Inverse) { instances = getInputFormat().testCV(m_NumFolds, m_Fold - 1);
transformed.stratify(folds);
data.randomize(random); if (data.classAttribute().isNominal()) { data.stratify(numFolds);
&& !getPreserveOrder()) { getStepManager().logBasic("Stratifying data"); dataSet.stratify(m_numFolds);