org.jdmp.core.dataset.ListDataSet java code examples

public void compress(final ListDataSet dataSet) {
  new PFor(0, dataSet.size() - 1) {
    @Override
    public void step(int i) {
      Sample sample = dataSet.get(i);
      compress(sample);
    }
  };
}

public final List<ListDataSet> splitByClass() {
  List<ListDataSet> returnDataSets = new ArrayList<ListDataSet>();
  for (int i = 0; i < getClassCount(); i++) {
    ListDataSet ds = DataSet.Factory.labeledDataSet("Class " + i);
    for (Sample s : this) {
      if (s.getTargetClass() == i) {
        ds.add(s.clone());
      }
    }
    returnDataSets.add(ds);
  }
  return returnDataSets;
}

  @Override
  public void step(int i) {
    Sample sample = dataSet.get(i);
    compress(sample);
  }
};

switch (column) {
case DataSetListTableModel.IDCOLUMN:
  o = dataSet.getId();
  break;
case DataSetListTableModel.LABELCOLUMN:
  o = dataSet.getLabel();
  break;
default:

public final List<ListDataSet> splitByCount(boolean shuffle, int... count) {
  List<ListDataSet> dataSets = new ArrayList<ListDataSet>();
  List<Sample> all = new FastArrayList<Sample>();
  all.addAll(this);
  for (int i = 0; i < count.length; i++) {
    ListDataSet ds = DataSet.Factory.labeledDataSet("DataSet" + i);
    for (int c = 0; c < count[i]; c++) {
      if (shuffle) {
        ds.add(all.remove(MathUtil.nextInteger(all.size())));
      } else {
        ds.add(all.remove(0));
      }
    }
    dataSets.add(ds);
  }
  ListDataSet ds = DataSet.Factory.labeledDataSet("DataSet" + count.length);
  ds.addAll(all);
  dataSets.add(ds);
  return dataSets;
}

public DataSetToInstancesWrapper(ListDataSet dataSet, boolean discrete, boolean includeTarget) {
  super(dataSet.getLabel(), new DataSetToAttributeInfoWrapper(dataSet, discrete), dataSet
      .size());
  if (includeTarget) {
    setClassIndex(dataSet.getFeatureCount());
  }
  for (Sample s : dataSet) {
    Matrix input = s.getAsMatrix(INPUT);
    Matrix weight = s.getAsMatrix(WEIGHT);
    Matrix target = s.getAsMatrix(TARGET);
    add(new SampleToInstanceWrapper(input, weight, target, discrete, includeTarget));
  }
}

public ListDataSet calculate(ListDataSet dataSet) throws Exception {
  product1ToIds.setLabel("Product 1 Ids");
  product2ToIds.setLabel("Product 2 Ids");
  Matrix product1Count = new CountMatrix(product1ToIds);
  product1Count.setLabel("Product 1 Count");
  Matrix product2Count = new CountMatrix(product2ToIds);
  product2Count.setLabel("Product 2 Count");
  for (int r = 0; r < dataSet.size(); r++) {
    if (r % 1000 == 0) {
      System.out.println(r + " of " + dataSet.size());
    }
    RelationalSample s = (RelationalSample) dataSet.get(r);
    Collection<?> products = s.getObjects();
    if (products.size() != 0) {
      addProduct1Count(products, r);
      addProduct2Count(products, r);
    }
  }
  return calculateP(minSupport);
}

new PFor(0, dataSet.size() - 1) {
if (dataSet.get(0).getAsMatrix(getTargetLabel()) != null) {
  final Matrix confusion;
  double error = 0.0;
  Matrix rmse = Matrix.Factory.linkToValue(Math.sqrt(error / dataSet.size()));
  rmse.setLabel("RMSE with " + getLabel());
  dataSet.setMatrix(Variable.RMSE, rmse);
  dataSet.setMatrix(Variable.CONFUSION, confusion);
      / (double) dataSet.size());
  accuracy.setLabel("Accuracy with " + getLabel());
  dataSet.setMatrix(Variable.ACCURACY, accuracy);
  dataSet.setMatrix(Variable.ERRORCOUNT, errorMatrix);
  dataSet.setMatrix(Variable.SENSITIVITY, sensitivity);
  dataSet.setMatrix(Variable.SPECIFICITY, specificity);
  dataSet.setMatrix(Variable.PRECISION, precision);
  dataSet.setMatrix(Variable.RECALL, recall);
  dataSet.setMatrix(Variable.FMEASURE, fmeasure);
  dataSet.setMatrix(Variable.FMEASUREMACRO, fmeasureMacro);

  @Test
  public void testTagger() throws Exception {
    if (tagger == null) {
      return;
    }

    ListDataSet ds = new DefaultListDataSet();
    Sample sa1 = new DefaultSample();
    sa1.put(Sample.INPUT, s1);
    Sample sa2 = new DefaultSample();
    sa2.put(Sample.INPUT, s2);
    ds.add(sa1);
    ds.add(sa2);

    tokenizer.tokenize(Sample.INPUT, ds);
    tagger.tag(ds);

    sa1 = ds.get(0);
    sa2 = ds.get(1);

    Matrix m1 = sa1.getAsMatrix(Tagger.TAGGED);
    Matrix m2 = sa2.getAsMatrix(Tagger.TAGGED);

    assertEquals(2, m1.getColumnCount());
    assertEquals(11, m1.getRowCount());
    assertEquals(2, m2.getColumnCount());
    assertEquals(5, m2.getRowCount());
  }
}

iris.setLabel("Iris flower data set");
iris.setMetaData(Sample.URL, "http://archive.ics.uci.edu/ml/datasets/Iris");
iris.setDescription("Fisher's Iris data set is a multivariate data set introduced by Sir Ronald Aylmer Fisher (1936) as an example of discriminant analysis.");
s0.setLabel("Iris-setosa");
s0.setId("iris-0");
iris.add(s0);
s1.setLabel("Iris-setosa");
s1.setId("iris-1");
iris.add(s1);
s2.setLabel("Iris-setosa");
s2.setId("iris-2");
iris.add(s2);
s3.setLabel("Iris-setosa");
s3.setId("iris-3");
iris.add(s3);
s4.setLabel("Iris-setosa");
s4.setId("iris-4");
iris.add(s4);
s5.setLabel("Iris-setosa");
s5.setId("iris-5");
iris.add(s5);
s6.setLabel("Iris-setosa");

public void trainAll(ListDataSet dataSet) {
  reset();
  classCount = getClassCount(dataSet);
  for (int i = 0; i < classCount; i++) {
    System.out.println("Training class " + i);
    Classifier c = singleClassClassifier.emptyCopy();
    singleClassClassifiers.add(c);
    Matrix input = dataSet.getInputMatrix();
    Matrix target = dataSet.getTargetMatrix().selectColumns(Ret.LINK, i);
    if (twoColumns) {
      Matrix target2 = target.minus(1).abs(Ret.NEW);
      target = Matrix.Factory.horCat(target, target2);
    }
    ListDataSet ds = DataSet.Factory.linkToInputAndTarget(input, target);
    c.trainAll(ds);
  }
}

  @Test
  public void testMLP() throws Exception {
    ListDataSet iris = ListDataSet.Factory.IRIS();
    iris.getInputMatrix().standardize(Ret.ORIG, Matrix.ROW);

    MultiLayerNetwork mlp = new MultiLayerNetwork(10);
    mlp.setLearningRate(0.05);

    for (int i = 0; i < 300; i++) {
      mlp.trainOnce(iris);
    }
    mlp.predictAll(iris);

    assertEquals(0.90, iris.getAccuracy(), 0.2);
  }
}

public static void main(String[] args) throws Exception {
  Matrix data = Matrix.Factory.linkTo().file("/home/arndt/muenchen/totale2.txt").asDenseCSV();
  // data.showGUI();
  ListDataSet orig = new DefaultListDataSet();
  // for (int r = 0; r < 10000; r++) {
  for (int r = 0; r < data.getRowCount(); r++) {
    if (r % 1000 == 0) {
      System.out.println(r + " of " + data.getRowCount());
    }
    Collection<?> products = getProductsInLine(data, r);
    if (products.size() != 0) {
      RelationalSample s = Sample.Factory.relationalSample(products);
      orig.add(s);
    }
  }
  MarketBasketAnalysis mba = new MarketBasketAnalysis();
  orig.showGUI();
  ListDataSet ds = mba.calculate(orig);
  ds.showGUI();
}

public String getLabel() {
  return dataSet.getLabel();
}

public synchronized ListDataSet search(Query query, int start, int count) throws Exception {
  System.out.println("searching for: " + query);
  TopDocs top = getIndexSearcher().search(query, 100);
  MoreLikeThis mlt = new MoreLikeThis(getIndexSearcher().getIndexReader());
  mlt.setFieldNames(new String[] { Variable.LABEL, Variable.DESCRIPTION, Variable.TAGS });
  mlt.setMaxWordLen(MAXWORDLENGTH);
  TopDocs td = indexSearcher.search(query, count);
  ListDataSet result = new DefaultListDataSet();
  result.setMetaData("Total", td.totalHits);
  for (ScoreDoc sd : td.scoreDocs) {
    int id = sd.doc;
    Document doc = indexSearcher.doc(id);
    Sample s = null;
    s = (Sample) SerializationUtil.deserialize(doc.getBinaryValue("RawData").bytes);
    s.put(Sample.SCORE, MathUtil.getMatrix(sd.score));
    String[] terms = mlt.retrieveInterestingTerms(id);
    for (int i = 0; i < 10 && i < terms.length; i++) {
      s.put(Variable.SUGGESTEDTAGS, terms[i]);
    }
    result.add(s);
  }
  return result;
}

public static void main(String[] args) {
  // load example data set
  ListDataSet dataSet = DataSet.Factory.IRIS();
  // create a classifier
  LinearRegression classifier = new LinearRegression();
  // train the classifier using all data
  classifier.trainAll(dataSet);
  // use the classifier to make predictions
  classifier.predictAll(dataSet);
  // get the results
  double accurary = dataSet.getAccuracy();
  System.out.println("accuracy: " + accurary);
}

Matrix valueCounts = dataSet.getInputMatrix().max(Ret.NEW, Matrix.ROW).plus(1);
for (int j = 0; j < dataSet.getInputMatrix().getColumnCount(); j++) {
  weka.core.Attribute a = null;
  if (discrete) {
int classCount = dataSet.getClassCount();
for (int i = 0; i < classCount; i++) {
  classes.addElement("Class " + i);

List<ListDataSet> dss = dataSet.splitForCV(folds, fold, randomSeed + run);
ListDataSet train = dss.get(0);
ListDataSet test = dss.get(1);
algorithm.predictAll(test);
acc.add(test.getAccuracy());
fm.add(test.getAsDouble(Variable.FMEASUREMACRO));
sens.add(test.getAsDouble(Variable.SENSITIVITY));
spec.add(test.getAsDouble(Variable.SPECIFICITY));
prec.add(test.getAsDouble(Variable.PRECISION));
rec.add(test.getAsDouble(Variable.RECALL));
rmse.add(test.getAsDouble(Variable.RMSE));
System.out.print(test.getAsDouble(Variable.FMEASUREMACRO) + "\t");

public void train(ListDataSet dataSet) {
  final int featureCount = dataSet.getFeatureCount();
  randomVectors = Matrix.Factory.randn(featureCount, numberOfBits);
}

private Matrix createCompleteMatrix(ListDataSet dataSet) {
  final int sampleCount = dataSet.size();
  final int featureCount = getFeatureCount(dataSet);
  final int targetCount = getClassCount(dataSet);
  Matrix m = Matrix.Factory.zeros(sampleCount, featureCount + targetCount);
  for (int r = 0; r < sampleCount; r++) {
    Sample s = dataSet.get(r);
    Matrix input = s.getAsMatrix(getInputLabel()).toColumnVector(Ret.NEW);
    Matrix target = s.getAsMatrix(getTargetLabel()).toColumnVector(Ret.NEW);
    for (int c = 0; c < featureCount; c++) {
      m.setAsDouble(input.getAsDouble(0, c), r, c);
    }
    for (int c = 0; c < targetCount; c++) {
      m.setAsDouble(target.getAsDouble(0, c), r, c + featureCount);
    }
  }
  return m;
}

Most used methods

Popular in Java

Making http requests using okhttp
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
scheduleAtFixedRate (Timer)
requestLocationUpdates (LocationManager)
Pointer (com.sun.jna)
An abstraction for a native pointer data type. A Pointer instance represents, on the Java side, a na
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
PriorityQueue (java.util)
A PriorityQueue holds elements on a priority heap, which orders the elements according to their natu
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
Option (scala)
From CI to AI: The AI layer in your organization

How to useListDataSet in org.jdmp.core.dataset

Best Java code snippets using org.jdmp.core.dataset.ListDataSet (Showing top 20 results out of 315)

How to use
ListDataSet
in
org.jdmp.core.dataset