weka.core.Instances.randomize java code examples

@Override
public void randomize(Random arg0) {
 super.randomize(arg0);
}

/**
 * Creates the training set for one fold of a cross-validation on the dataset.
 * The data is subsequently randomized based on the given random number
 * generator.
 * 
 * @param numFolds the number of folds in the cross-validation. Must be
 *          greater than 1.
 * @param numFold 0 for the first fold, 1 for the second, ...
 * @param random the random number generator
 * @return the training set
 * @throws IllegalArgumentException if the number of folds is less than 2 or
 *           greater than the number of instances.
 */
// @ requires 2 <= numFolds && numFolds < numInstances();
// @ requires 0 <= numFold && numFold < numFolds;
public Instances trainCV(int numFolds, int numFold, Random random) {
 Instances train = trainCV(numFolds, numFold);
 train.randomize(random);
 return train;
}

/**
 * Creates the training set for one fold of a cross-validation on the dataset.
 * The data is subsequently randomized based on the given random number
 * generator.
 * 
 * @param numFolds the number of folds in the cross-validation. Must be
 *          greater than 1.
 * @param numFold 0 for the first fold, 1 for the second, ...
 * @param random the random number generator
 * @return the training set
 * @throws IllegalArgumentException if the number of folds is less than 2 or
 *           greater than the number of instances.
 */
// @ requires 2 <= numFolds && numFolds < numInstances();
// @ requires 0 <= numFold && numFold < numFolds;
public Instances trainCV(int numFolds, int numFold, Random random) {
 Instances train = trainCV(numFolds, numFold);
 train.randomize(random);
 return train;
}

/**
 * Reset the visualize panel's buttons and the plot panels instances
 * 
 * @param inst the data
 * @param cIndex the color index
 */
private void plotReset(Instances inst, int cIndex) {
 if (m_splitListener == null) {
  m_submit.setText("Reset");
  m_submit.setActionCommand("Reset");
  // if (m_origInstances == null || m_origInstances == inst) {
  if (m_originalPlot == null || m_originalPlot.m_plotInstances == inst) {
   m_submit.setEnabled(false);
  } else {
   m_submit.setEnabled(true);
  }
 } else {
  m_submit.setEnabled(false);
 }
 m_plotInstances = inst;
 if (m_splitListener != null) {
  m_plotInstances.randomize(new Random());
 }
 m_xIndex = 0;
 m_yIndex = 0;
 m_cIndex = cIndex;
 cancelShapes();
}

/**
 * Train the required classifier with generated Weka ARFF file.
 * 
 */
private void trainClassifier() {
  for (int i = 0; i < numOfModelFiles; i++) {
    Instances data;
    try {
      data = loadInstancesFromARFF(wekaArffFile + String.valueOf(i)
          + ".arff", "class");
      java.util.Random rand = new java.util.Random();
      data.randomize(rand);
      // classifier = new LibLINEAR();
      // ((LibLINEAR) classifier).setSVMType(new SelectedTag(
      // LibLINEAR.SVMTYPE_L2_LR, LibLINEAR.TAGS_SVMTYPE));
      classifier.buildClassifier(data);
      // serialize model
      weka.core.SerializationHelper.write(
          modelFile + String.valueOf(i), classifier);
    } catch (IOException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    } catch (Exception e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
    }
  }
}

@Override
public void buildClassifier(Instances train) throws Exception {
   testCapabilities(train);
   
  if (getDebug()) System.out.print("-: Models: ");
  train = new Instances(train);
  m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier, m_NumIterations);
  int sub_size = (train.numInstances()*m_BagSizePercent/100);
  for(int i = 0; i < m_NumIterations; i++) {
    if(getDebug()) System.out.print(""+i+" ");
    if (m_Classifiers[i] instanceof Randomizable) ((Randomizable)m_Classifiers[i]).setSeed(i);
    train.randomize(new Random(m_Seed+i));
    Instances sub_train = new Instances(train,0,sub_size);
    m_Classifiers[i].buildClassifier(sub_train);
  }
  if (getDebug()) System.out.println(":-");
}

@Override
public void buildClassifier(Instances train) throws Exception {
   testCapabilities(train);
   
  if (getDebug()) System.out.print("-: Models: ");
  train = new Instances(train);
  m_Classifiers = MultilabelClassifier.makeCopies((MultilabelClassifier)m_Classifier, m_NumIterations);
  int sub_size = (train.numInstances()*m_BagSizePercent/100);
  for(int i = 0; i < m_NumIterations; i++) {
    if(getDebug()) System.out.print(""+i+" ");
    if (m_Classifiers[i] instanceof Randomizable) ((Randomizable)m_Classifiers[i]).setSeed(i);
    train.randomize(new Random(m_Seed+i));
    Instances sub_train = new Instances(train,0,sub_size);
    m_Classifiers[i].buildClassifier(sub_train);
  }
  if (getDebug()) System.out.println(":-");
}

@Override
public void buildClassifier(Instances train) throws Exception {
   testCapabilities(train);
   
  if (getDebug()) System.out.print("-: Models: ");
  train = new Instances(train);
  m_Classifiers = ProblemTransformationMethod.makeCopies((ProblemTransformationMethod) m_Classifier, m_NumIterations);
  int sub_size = (train.numInstances()*m_BagSizePercent/100);
  for(int i = 0; i < m_NumIterations; i++) {
    if(getDebug()) System.out.print(""+i+" ");
    if (m_Classifiers[i] instanceof Randomizable) ((Randomizable)m_Classifiers[i]).setSeed(i);
    train.randomize(new Random(m_Seed+i));
    Instances sub_train = new Instances(train,0,sub_size);
    m_Classifiers[i].buildClassifier(sub_train);
  }
  if (getDebug()) System.out.println(":-");
}

/**
 * Generate a bunch of predictions ready for processing, by performing a
 * cross-validation on the supplied dataset.
 * 
 * @param classifier the Classifier to evaluate
 * @param data the dataset
 * @param numFolds the number of folds in the cross-validation.
 * @exception Exception if an error occurs
 */
public ArrayList<Prediction> getCVPredictions(Classifier classifier,
 Instances data, int numFolds) throws Exception {
 ArrayList<Prediction> predictions = new ArrayList<Prediction>();
 Instances runInstances = new Instances(data);
 Random random = new Random(m_Seed);
 runInstances.randomize(random);
 if (runInstances.classAttribute().isNominal() && (numFolds > 1)) {
  runInstances.stratify(numFolds);
 }
 for (int fold = 0; fold < numFolds; fold++) {
  Instances train = runInstances.trainCV(numFolds, fold, random);
  Instances test = runInstances.testCV(numFolds, fold);
  ArrayList<Prediction> foldPred = getTrainTestPredictions(classifier,
   train, test);
  predictions.addAll(foldPred);
 }
 return predictions;
}

  private MultipleEvaluation innerCrossValidate(MultiLabelLearner learner, MultiLabelInstances data, boolean hasMeasures, List<Measure> measures, int someFolds) {
    Evaluation[] evaluation = new Evaluation[someFolds];

    Instances workingSet = new Instances(data.getDataSet());
    workingSet.randomize(new Random(seed));
    for (int i = 0; i < someFolds; i++) {
      System.out.println("Fold " + (i + 1) + "/" + someFolds);
      try {
        Instances train = workingSet.trainCV(someFolds, i);
        Instances test = workingSet.testCV(someFolds, i);
        MultiLabelInstances mlTrain = new MultiLabelInstances(train, data.getLabelsMetaData());
        MultiLabelInstances mlTest = new MultiLabelInstances(test, data.getLabelsMetaData());
        MultiLabelLearner clone = learner.makeCopy();
        clone.build(mlTrain);
        if (hasMeasures)
          evaluation[i] = evaluate(clone, mlTest, measures);
        else
          evaluation[i] = evaluate(clone, mlTest);
      } catch (Exception ex) {
        Logger.getLogger(Evaluator.class.getName()).log(Level.SEVERE, null, ex);
      }
    }
    MultipleEvaluation me = new MultipleEvaluation(evaluation, data);
    me.calculateStatistics();
    return me;
  }
}

/**
 * Perform a cross validation for attribute selection. With subset evaluators
 * the number of times each attribute is selected over the cross validation is
 * reported. For attribute evaluators, the average merit and average ranking +
 * std deviation is reported for each attribute.
 * 
 * @return the results of cross validation as a String
 * @exception Exception if an error occurs during cross validation
 */
public String CrossValidateAttributes() throws Exception {
 Instances cvData = new Instances(m_trainInstances);
 Instances train;
 Random random = new Random(m_seed);
 cvData.randomize(random);
 if (!(m_ASEvaluator instanceof UnsupervisedSubsetEvaluator)
  && !(m_ASEvaluator instanceof UnsupervisedAttributeEvaluator)) {
  if (cvData.classAttribute().isNominal()) {
   cvData.stratify(m_numFolds);
  }
 }
 for (int i = 0; i < m_numFolds; i++) {
  // Perform attribute selection
  train = cvData.trainCV(m_numFolds, i, random);
  selectAttributesCVSplit(train);
 }
 return CVResultsString();
}

/**
 * Builds the clusterer.
 * 
 * @param data the training instances.
 * @throws Exception if something goes wrong.
 */
@Override
public void buildClusterer(Instances data) throws Exception {
 m_numberOfClusters = -1;
 m_cobwebTree = null;
 m_numberSplits = 0;
 m_numberMerges = 0;
 // can clusterer handle the data?
 getCapabilities().testWithFail(data);
 // randomize the instances
 data = new Instances(data);
 if (getSeed() >= 0) {
  data.randomize(new Random(getSeed()));
 }
 for (int i = 0; i < data.numInstances(); i++) {
  updateClusterer(data.instance(i));
 }
 updateFinished();
}

/**
 * Builds the clusterer.
 * 
 * @param data the training instances.
 * @throws Exception if something goes wrong.
 */
@Override
public void buildClusterer(Instances data) throws Exception {
 m_numberOfClusters = -1;
 m_cobwebTree = null;
 m_numberSplits = 0;
 m_numberMerges = 0;
 // can clusterer handle the data?
 getCapabilities().testWithFail(data);
 // randomize the instances
 data = new Instances(data);
 if (getSeed() >= 0) {
  data.randomize(new Random(getSeed()));
 }
 for (int i = 0; i < data.numInstances(); i++) {
  updateClusterer(data.instance(i));
 }
 updateFinished();
}

/**
 * Signify that this batch of input to the filter is finished. If the filter
 * requires all instances prior to filtering, output() may now be called to
 * retrieve the filtered instances. Any subsequent instances filtered should
 * be filtered based on setting obtained from the first batch (unless the
 * setInputFormat has been re-assigned or new options have been set). This
 * implementation randomizes all the instances received in the batch.
 * 
 * @return true if there are instances pending output
 * @throws IllegalStateException if no input format has been set.
 */
@Override
public boolean batchFinished() {
 if (getInputFormat() == null) {
  throw new IllegalStateException("No input instance format defined");
 }
 if (!isFirstBatchDone()) {
  getInputFormat().randomize(m_Random);
 }
 for (int i = 0; i < getInputFormat().numInstances(); i++) {
  push(getInputFormat().instance(i), false); // No need to copy because of bufferInput()
 }
 flushInput();
 m_NewBatch = true;
 m_FirstBatchDone = true;
 return (numPendingOutput() != 0);
}

/**
 * Signify that this batch of input to the filter is finished. If the filter
 * requires all instances prior to filtering, output() may now be called to
 * retrieve the filtered instances. Any subsequent instances filtered should
 * be filtered based on setting obtained from the first batch (unless the
 * setInputFormat has been re-assigned or new options have been set). This
 * implementation randomizes all the instances received in the batch.
 * 
 * @return true if there are instances pending output
 * @throws IllegalStateException if no input format has been set.
 */
@Override
public boolean batchFinished() {
 if (getInputFormat() == null) {
  throw new IllegalStateException("No input instance format defined");
 }
 if (!isFirstBatchDone()) {
  getInputFormat().randomize(m_Random);
 }
 for (int i = 0; i < getInputFormat().numInstances(); i++) {
  push(getInputFormat().instance(i), false); // No need to copy because of bufferInput()
 }
 flushInput();
 m_NewBatch = true;
 m_FirstBatchDone = true;
 return (numPendingOutput() != 0);
}

public static double[][] LEAD(Instances D, Classifier h, Random r, String MDType)  throws Exception {
  Instances D_r = new Instances(D);
  D_r.randomize(r);
  Instances D_train = new Instances(D_r,0,D_r.numInstances()*60/100);
  Instances D_test = new Instances(D_r,D_train.numInstances(),D_r.numInstances()-D_train.numInstances());
  BR br = new BR();
  br.setClassifier(h);
  Result result = Evaluation.evaluateModel((MultiLabelClassifier)br,D_train,D_test,"PCut1","1");
  return LEAD(D_test, result, MDType);
}

public static double[][] LEAD(Instances D, Classifier h, Random r, String MDType)  throws Exception {
  Instances D_r = new Instances(D);
  D_r.randomize(r);
  Instances D_train = new Instances(D_r,0,D_r.numInstances()*60/100);
  Instances D_test = new Instances(D_r,D_train.numInstances(),D_r.numInstances()-D_train.numInstances());
  BR br = new BR();
  br.setClassifier(h);
  Result result = Evaluation.evaluateModel((MultiLabelClassifier)br,D_train,D_test,"PCut1","1");
  return LEAD(D_test, result, MDType);
}

@Override
protected void buildInternal(MultiLabelInstances trainingSet)
    throws Exception {
  Instances dataSet = new Instances(trainingSet.getDataSet());
  for (int i = 0; i < numOfModels; i++) {
    dataSet.randomize(rand);
    RemovePercentage rmvp = new RemovePercentage();
    rmvp.setInputFormat(dataSet);
    rmvp.setPercentage(percentage);
    rmvp.setInvertSelection(true);
    Instances trainDataSet = Filter.useFilter(dataSet, rmvp);
    MultiLabelInstances train = new MultiLabelInstances(trainDataSet, trainingSet.getLabelsMetaData());
    ensemble[i].build(train);
  }
}

/**
 * LEAD - Performs LEAD on dataset 'D', using BR with base classifier 'h', under random seed 'r'.
 * <br>
 * WARNING: changing this method will affect the perfomance of e.g., BCC -- on the other hand the original BCC paper did not use LEAD, so don't worry.
 */
public static double[][] LEAD(Instances D, Classifier h, Random r)  throws Exception {
  Instances D_r = new Instances(D);
  D_r.randomize(r);
  Instances D_train = new Instances(D_r,0,D_r.numInstances()*60/100);
  Instances D_test = new Instances(D_r,D_train.numInstances(),D_r.numInstances()-D_train.numInstances());
  BR br = new BR();
  br.setClassifier(h);
  Result result = Evaluation.evaluateModel((MultiLabelClassifier)br,D_train,D_test,"PCut1","1");
  return LEAD2(D_test,result);
}

/**
 * LEAD - Performs LEAD on dataset 'D', using BR with base classifier 'h', under random seed 'r'.
 * <br>
 * WARNING: changing this method will affect the perfomance of e.g., BCC -- on the other hand the original BCC paper did not use LEAD, so don't worry.
 */
public static double[][] LEAD(Instances D, Classifier h, Random r)  throws Exception {
  Instances D_r = new Instances(D);
  D_r.randomize(r);
  Instances D_train = new Instances(D_r,0,D_r.numInstances()*60/100);
  Instances D_test = new Instances(D_r,D_train.numInstances(),D_r.numInstances()-D_train.numInstances());
  BR br = new BR();
  br.setClassifier(h);
  Result result = Evaluation.evaluateModel((MultiLabelClassifier)br,D_train,D_test,"PCut1","1");
  return LEAD2(D_test,result);
}

Javadoc

Shuffles the instances in the set so that they are ordered randomly.

Popular methods of Instances

<init>
Creates a new set of instances by copying a subset of another set.
numAttributes
Returns the number of attributes.
add
Adds one instance to the end of the set. Shallow copies instance before it is added. Increases the s
attribute
Returns an attribute given its name. If there is more than one attribute with the same name, it retu
numInstances
Returns the number of instances in the dataset.
setClassIndex
Sets the class index of the set. If the class index is negative there is assumed to be no class. (ie
instance
Returns the instance at the given position.
classIndex
Returns the class attribute's index. Returns negative number if it's undefined.
classAttribute
Returns the class attribute.
numClasses
Returns the number of class labels.
deleteWithMissingClass
Removes all instances with a missing class value from the dataset.
relationName
Returns the relation's name.

Popular in Java

Parsing JSON documents to java classes using gson
setScale (BigDecimal)
getSupportFragmentManager (FragmentActivity)
startActivity (Activity)
FileOutputStream (java.io)
An output stream that writes bytes to a file. If the output file exists, it can be replaced or appen
BigInteger (java.math)
An immutable arbitrary-precision signed integer.FAST CRYPTOGRAPHY This implementation is efficient f
UUID (java.util)
UUID is an immutable representation of a 128-bit universally unique identifier (UUID). There are mul
Executors (java.util.concurrent)
Factory and utility methods for Executor, ExecutorService, ScheduledExecutorService, ThreadFactory,
ImageIO (javax.imageio)
JCheckBox (javax.swing)
Top plugins for WebStorm

How to use randomizemethodin weka.core.Instances

Best Java code snippets using weka.core.Instances.randomize (Showing top 20 results out of 315)

How to use
randomize
method
in
weka.core.Instances