@Override public void randomize(Random arg0) { super.randomize(arg0); }
/** * Creates the training set for one fold of a cross-validation on the dataset. * The data is subsequently randomized based on the given random number * generator. * * @param numFolds the number of folds in the cross-validation. Must be * greater than 1. * @param numFold 0 for the first fold, 1 for the second, ... * @param random the random number generator * @return the training set * @throws IllegalArgumentException if the number of folds is less than 2 or * greater than the number of instances. */ // @ requires 2 <= numFolds && numFolds < numInstances(); // @ requires 0 <= numFold && numFold < numFolds; public Instances trainCV(int numFolds, int numFold, Random random) { Instances train = trainCV(numFolds, numFold); train.randomize(random); return train; }
/** * Creates the training set for one fold of a cross-validation on the dataset. * The data is subsequently randomized based on the given random number * generator. * * @param numFolds the number of folds in the cross-validation. Must be * greater than 1. * @param numFold 0 for the first fold, 1 for the second, ... * @param random the random number generator * @return the training set * @throws IllegalArgumentException if the number of folds is less than 2 or * greater than the number of instances. */ // @ requires 2 <= numFolds && numFolds < numInstances(); // @ requires 0 <= numFold && numFold < numFolds; public Instances trainCV(int numFolds, int numFold, Random random) { Instances train = trainCV(numFolds, numFold); train.randomize(random); return train; }
/** * Reset the visualize panel's buttons and the plot panels instances * * @param inst the data * @param cIndex the color index */ private void plotReset(Instances inst, int cIndex) { if (m_splitListener == null) { m_submit.setText("Reset"); m_submit.setActionCommand("Reset"); // if (m_origInstances == null || m_origInstances == inst) { if (m_originalPlot == null || m_originalPlot.m_plotInstances == inst) { m_submit.setEnabled(false); } else { m_submit.setEnabled(true); } } else { m_submit.setEnabled(false); } m_plotInstances = inst; if (m_splitListener != null) { m_plotInstances.randomize(new Random()); } m_xIndex = 0; m_yIndex = 0; m_cIndex = cIndex; cancelShapes(); }
/** * Train the required classifier with generated Weka ARFF file. * */ private void trainClassifier() { for (int i = 0; i < numOfModelFiles; i++) { Instances data; try { data = loadInstancesFromARFF(wekaArffFile + String.valueOf(i) + ".arff", "class"); java.util.Random rand = new java.util.Random(); data.randomize(rand); // classifier = new LibLINEAR(); // ((LibLINEAR) classifier).setSVMType(new SelectedTag( // LibLINEAR.SVMTYPE_L2_LR, LibLINEAR.TAGS_SVMTYPE)); classifier.buildClassifier(data); // serialize model weka.core.SerializationHelper.write( modelFile + String.valueOf(i), classifier); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
@Override public void buildClassifier(Instances train) throws Exception { testCapabilities(train); if (getDebug()) System.out.print("-: Models: "); train = new Instances(train); m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier, m_NumIterations); int sub_size = (train.numInstances()*m_BagSizePercent/100); for(int i = 0; i < m_NumIterations; i++) { if(getDebug()) System.out.print(""+i+" "); if (m_Classifiers[i] instanceof Randomizable) ((Randomizable)m_Classifiers[i]).setSeed(i); train.randomize(new Random(m_Seed+i)); Instances sub_train = new Instances(train,0,sub_size); m_Classifiers[i].buildClassifier(sub_train); } if (getDebug()) System.out.println(":-"); }
@Override public void buildClassifier(Instances train) throws Exception { testCapabilities(train); if (getDebug()) System.out.print("-: Models: "); train = new Instances(train); m_Classifiers = MultilabelClassifier.makeCopies((MultilabelClassifier)m_Classifier, m_NumIterations); int sub_size = (train.numInstances()*m_BagSizePercent/100); for(int i = 0; i < m_NumIterations; i++) { if(getDebug()) System.out.print(""+i+" "); if (m_Classifiers[i] instanceof Randomizable) ((Randomizable)m_Classifiers[i]).setSeed(i); train.randomize(new Random(m_Seed+i)); Instances sub_train = new Instances(train,0,sub_size); m_Classifiers[i].buildClassifier(sub_train); } if (getDebug()) System.out.println(":-"); }
@Override public void buildClassifier(Instances train) throws Exception { testCapabilities(train); if (getDebug()) System.out.print("-: Models: "); train = new Instances(train); m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier, m_NumIterations); int sub_size = (train.numInstances()*m_BagSizePercent/100); for(int i = 0; i < m_NumIterations; i++) { if(getDebug()) System.out.print(""+i+" "); if (m_Classifiers[i] instanceof Randomizable) ((Randomizable)m_Classifiers[i]).setSeed(i); train.randomize(new Random(m_Seed+i)); Instances sub_train = new Instances(train,0,sub_size); m_Classifiers[i].buildClassifier(sub_train); } if (getDebug()) System.out.println(":-"); }
/** * Generate a bunch of predictions ready for processing, by performing a * cross-validation on the supplied dataset. * * @param classifier the Classifier to evaluate * @param data the dataset * @param numFolds the number of folds in the cross-validation. * @exception Exception if an error occurs */ public ArrayList<Prediction> getCVPredictions(Classifier classifier, Instances data, int numFolds) throws Exception { ArrayList<Prediction> predictions = new ArrayList<Prediction>(); Instances runInstances = new Instances(data); Random random = new Random(m_Seed); runInstances.randomize(random); if (runInstances.classAttribute().isNominal() && (numFolds > 1)) { runInstances.stratify(numFolds); } for (int fold = 0; fold < numFolds; fold++) { Instances train = runInstances.trainCV(numFolds, fold, random); Instances test = runInstances.testCV(numFolds, fold); ArrayList<Prediction> foldPred = getTrainTestPredictions(classifier, train, test); predictions.addAll(foldPred); } return predictions; }
private MultipleEvaluation innerCrossValidate(MultiLabelLearner learner, MultiLabelInstances data, boolean hasMeasures, List<Measure> measures, int someFolds) { Evaluation[] evaluation = new Evaluation[someFolds]; Instances workingSet = new Instances(data.getDataSet()); workingSet.randomize(new Random(seed)); for (int i = 0; i < someFolds; i++) { System.out.println("Fold " + (i + 1) + "/" + someFolds); try { Instances train = workingSet.trainCV(someFolds, i); Instances test = workingSet.testCV(someFolds, i); MultiLabelInstances mlTrain = new MultiLabelInstances(train, data.getLabelsMetaData()); MultiLabelInstances mlTest = new MultiLabelInstances(test, data.getLabelsMetaData()); MultiLabelLearner clone = learner.makeCopy(); clone.build(mlTrain); if (hasMeasures) evaluation[i] = evaluate(clone, mlTest, measures); else evaluation[i] = evaluate(clone, mlTest); } catch (Exception ex) { Logger.getLogger(Evaluator.class.getName()).log(Level.SEVERE, null, ex); } } MultipleEvaluation me = new MultipleEvaluation(evaluation, data); me.calculateStatistics(); return me; } }
/** * Perform a cross validation for attribute selection. With subset evaluators * the number of times each attribute is selected over the cross validation is * reported. For attribute evaluators, the average merit and average ranking + * std deviation is reported for each attribute. * * @return the results of cross validation as a String * @exception Exception if an error occurs during cross validation */ public String CrossValidateAttributes() throws Exception { Instances cvData = new Instances(m_trainInstances); Instances train; Random random = new Random(m_seed); cvData.randomize(random); if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator) && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) { if (cvData.classAttribute().isNominal()) { cvData.stratify(m_numFolds); } } for (int i = 0; i < m_numFolds; i++) { // Perform attribute selection train = cvData.trainCV(m_numFolds, i, random); selectAttributesCVSplit(train); } return CVResultsString(); }
/** * Builds the clusterer. * * @param data the training instances. * @throws Exception if something goes wrong. */ @Override public void buildClusterer(Instances data) throws Exception { m_numberOfClusters = -1; m_cobwebTree = null; m_numberSplits = 0; m_numberMerges = 0; // can clusterer handle the data? getCapabilities().testWithFail(data); // randomize the instances data = new Instances(data); if (getSeed() >= 0) { data.randomize(new Random(getSeed())); } for (int i = 0; i < data.numInstances(); i++) { updateClusterer(data.instance(i)); } updateFinished(); }
/** * Builds the clusterer. * * @param data the training instances. * @throws Exception if something goes wrong. */ @Override public void buildClusterer(Instances data) throws Exception { m_numberOfClusters = -1; m_cobwebTree = null; m_numberSplits = 0; m_numberMerges = 0; // can clusterer handle the data? getCapabilities().testWithFail(data); // randomize the instances data = new Instances(data); if (getSeed() >= 0) { data.randomize(new Random(getSeed())); } for (int i = 0; i < data.numInstances(); i++) { updateClusterer(data.instance(i)); } updateFinished(); }
/** * Signify that this batch of input to the filter is finished. If the filter * requires all instances prior to filtering, output() may now be called to * retrieve the filtered instances. Any subsequent instances filtered should * be filtered based on setting obtained from the first batch (unless the * setInputFormat has been re-assigned or new options have been set). This * implementation randomizes all the instances received in the batch. * * @return true if there are instances pending output * @throws IllegalStateException if no input format has been set. */ @Override public boolean batchFinished() { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (!isFirstBatchDone()) { getInputFormat().randomize(m_Random); } for (int i = 0; i < getInputFormat().numInstances(); i++) { push(getInputFormat().instance(i), false); // No need to copy because of bufferInput() } flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); }
/** * Signify that this batch of input to the filter is finished. If the filter * requires all instances prior to filtering, output() may now be called to * retrieve the filtered instances. Any subsequent instances filtered should * be filtered based on setting obtained from the first batch (unless the * setInputFormat has been re-assigned or new options have been set). This * implementation randomizes all the instances received in the batch. * * @return true if there are instances pending output * @throws IllegalStateException if no input format has been set. */ @Override public boolean batchFinished() { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } if (!isFirstBatchDone()) { getInputFormat().randomize(m_Random); } for (int i = 0; i < getInputFormat().numInstances(); i++) { push(getInputFormat().instance(i), false); // No need to copy because of bufferInput() } flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); }
public static double[][] LEAD(Instances D, Classifier h, Random r, String MDType) throws Exception { Instances D_r = new Instances(D); D_r.randomize(r); Instances D_train = new Instances(D_r,0,D_r.numInstances()*60/100); Instances D_test = new Instances(D_r,D_train.numInstances(),D_r.numInstances()-D_train.numInstances()); BR br = new BR(); br.setClassifier(h); Result result = Evaluation.evaluateModel((MultiLabelClassifier)br,D_train,D_test,"PCut1","1"); return LEAD(D_test, result, MDType); }
public static double[][] LEAD(Instances D, Classifier h, Random r, String MDType) throws Exception { Instances D_r = new Instances(D); D_r.randomize(r); Instances D_train = new Instances(D_r,0,D_r.numInstances()*60/100); Instances D_test = new Instances(D_r,D_train.numInstances(),D_r.numInstances()-D_train.numInstances()); BR br = new BR(); br.setClassifier(h); Result result = Evaluation.evaluateModel((MultiLabelClassifier)br,D_train,D_test,"PCut1","1"); return LEAD(D_test, result, MDType); }
@Override protected void buildInternal(MultiLabelInstances trainingSet) throws Exception { Instances dataSet = new Instances(trainingSet.getDataSet()); for (int i = 0; i < numOfModels; i++) { dataSet.randomize(rand); RemovePercentage rmvp = new RemovePercentage(); rmvp.setInputFormat(dataSet); rmvp.setPercentage(percentage); rmvp.setInvertSelection(true); Instances trainDataSet = Filter.useFilter(dataSet, rmvp); MultiLabelInstances train = new MultiLabelInstances(trainDataSet, trainingSet.getLabelsMetaData()); ensemble[i].build(train); } }
/** * LEAD - Performs LEAD on dataset 'D', using BR with base classifier 'h', under random seed 'r'. * <br> * WARNING: changing this method will affect the perfomance of e.g., BCC -- on the other hand the original BCC paper did not use LEAD, so don't worry. */ public static double[][] LEAD(Instances D, Classifier h, Random r) throws Exception { Instances D_r = new Instances(D); D_r.randomize(r); Instances D_train = new Instances(D_r,0,D_r.numInstances()*60/100); Instances D_test = new Instances(D_r,D_train.numInstances(),D_r.numInstances()-D_train.numInstances()); BR br = new BR(); br.setClassifier(h); Result result = Evaluation.evaluateModel((MultiLabelClassifier)br,D_train,D_test,"PCut1","1"); return LEAD2(D_test,result); }
/** * LEAD - Performs LEAD on dataset 'D', using BR with base classifier 'h', under random seed 'r'. * <br> * WARNING: changing this method will affect the perfomance of e.g., BCC -- on the other hand the original BCC paper did not use LEAD, so don't worry. */ public static double[][] LEAD(Instances D, Classifier h, Random r) throws Exception { Instances D_r = new Instances(D); D_r.randomize(r); Instances D_train = new Instances(D_r,0,D_r.numInstances()*60/100); Instances D_test = new Instances(D_r,D_train.numInstances(),D_r.numInstances()-D_train.numInstances()); BR br = new BR(); br.setClassifier(h); Result result = Evaluation.evaluateModel((MultiLabelClassifier)br,D_train,D_test,"PCut1","1"); return LEAD2(D_test,result); }