public final List<ListDataSet> splitForCV(int numberOfCVSets, int idOfCVSet, long randomSeed) { List<ListDataSet> returnDataSets = new ArrayList<ListDataSet>(); List<List<Sample>> tempSampleLists = new ArrayList<List<Sample>>(); List<Sample> allSamples = new ArrayList<Sample>(this); Collections.shuffle(allSamples, new Random(randomSeed)); for (int set = 0; set < numberOfCVSets; set++) { List<Sample> partSamples = new ArrayList<Sample>(); tempSampleLists.add(partSamples); } while (!allSamples.isEmpty()) { for (int set = 0; set < numberOfCVSets; set++) { if (!allSamples.isEmpty()) { tempSampleLists.get(set).add(allSamples.remove(0)); } } } ListDataSet testSet = DataSet.Factory.labeledDataSet("TestSet" + randomSeed + "-" + idOfCVSet); testSet.addAll(tempSampleLists.get(idOfCVSet)); ListDataSet trainingSet = DataSet.Factory.labeledDataSet("TrainingSet" + randomSeed + "-" + idOfCVSet); for (int i = 0; i < numberOfCVSets; i++) { if (i != idOfCVSet) { trainingSet.addAll(tempSampleLists.get(i)); } } returnDataSets.add(trainingSet); returnDataSets.add(testSet); return returnDataSets; }
public final List<ListDataSet> splitByCount(boolean shuffle, int... count) { List<ListDataSet> dataSets = new ArrayList<ListDataSet>(); List<Sample> all = new FastArrayList<Sample>(); all.addAll(this); for (int i = 0; i < count.length; i++) { ListDataSet ds = DataSet.Factory.labeledDataSet("DataSet" + i); for (int c = 0; c < count[i]; c++) { if (shuffle) { ds.add(all.remove(MathUtil.nextInteger(all.size()))); } else { ds.add(all.remove(0)); } } dataSets.add(ds); } ListDataSet ds = DataSet.Factory.labeledDataSet("DataSet" + count.length); ds.addAll(all); dataSets.add(ds); return dataSets; }
public final List<ListDataSet> splitByClass() { List<ListDataSet> returnDataSets = new ArrayList<ListDataSet>(); for (int i = 0; i < getClassCount(); i++) { ListDataSet ds = DataSet.Factory.labeledDataSet("Class " + i); for (Sample s : this) { if (s.getTargetClass() == i) { ds.add(s.clone()); } } returnDataSets.add(ds); } return returnDataSets; }
ListDataSet train = DataSet.Factory.labeledDataSet("TrainingSet " + idOfCVSet + "/" + numberOfCVSets + "(" + randomSeed + ")"); ListDataSet test = DataSet.Factory.labeledDataSet("TestSet " + idOfCVSet + "/" + numberOfCVSets + "(" + randomSeed + ")");
public final ListDataSet bootstrap(int numberOfSamples) { ListDataSet ds = DataSet.Factory.labeledDataSet("Bootstrap of " + getLabel()); for (int i = 0; i < numberOfSamples; i++) { int rand = MathUtil.nextInteger(0, size()); ds.add(get(rand)); } return ds; }
public ListDataSet LogisticMap(int sampleCount, int inputLength, int predictionLength) { ListDataSet logistic = ListDataSet.Factory.labeledDataSet("Logistic Map"); double r = 3.82; Random random = new Random(); for (int si = 0; si < sampleCount; si++) { double x = random.nextDouble(); Matrix input = Matrix.Factory.zeros(1, inputLength); for (int i = 0; i < inputLength; i++) { x = r * x * (1 - x); input.setAsDouble(x, 0, i); } Matrix target = Matrix.Factory.zeros(1, predictionLength); for (int i = 0; i < predictionLength; i++) { x = r * x * (1 - x); target.setAsDouble(x, 0, i); } Sample s = Sample.Factory.labeledSample("Sample " + si); s.put(Sample.INPUT, input); s.put(Sample.TARGET, target); logistic.add(s); } return logistic; }
public ListDataSet HenonMap(int sampleCount, int inputLength, int predictionLength) { ListDataSet henon = ListDataSet.Factory.labeledDataSet("Henon Map");
public AbstractListDataSet ANIMALS() { AbstractListDataSet animals = ListDataSet.Factory.labeledDataSet("Animals");