eu.amidst.core.io.DataStreamLoader.open java code examples

  public static void main(String[] args) throws Exception {

    //We can open the data stream using the static class DataStreamLoader
    DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/syntheticData.arff");

    //We can save this data set to a new file using the static class DataStreamWriter
    DataStreamWriter.writeDataToFile(data, "datasets/simulated/tmp.arff");



  }
}

  public static void main(String[] args) throws Exception {

    //We can open the data stream using the static class DataStreamLoader
    DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/syntheticData.arff");

    //ReservoirSampling allows to create a DataOnMemory object containing a unfiorm subsample of the data stream
    DataOnMemory<DataInstance> dataOnMemory = ReservoirSampling.samplingNumberOfSamples(100, data);

    //We can save this data set to a new file using the static class DataStreamWriter
    DataStreamWriter.writeDataToFile(data, "datasets/simulated/subsample.arff");
  }
}

public static void process2(String[] args) throws IOException {
  DataStream<DataInstance> dataInstances = DataStreamLoader.open("/Users/andresmasegosa/Dropbox/Amidst/datasets/NFSAbstracts/docswords-joint.arff");
  double minWord = Double.MAX_VALUE;
  double maxWord = -Double.MAX_VALUE;
  for (DataInstance dataInstance : dataInstances) {
    double word = dataInstance.getValue(dataInstance.getAttributes().getAttributeByName("word"));
    if (minWord>word)
      minWord = word;
    if (maxWord<word)
      maxWord=word;
  }
  System.out.println(minWord);
  System.out.println(maxWord);
}

public static void shuflle(String[] args) throws IOException {
  //Utils.shuffleData("/Users/andresmasegosa/Dropbox/Amidst/datasets/uci-text/docword.nips.arff", "/Users/andresmasegosa/Dropbox/Amidst/datasets/uci-text/docword.nips.shuffled.arff");
  DataStream<DataInstance> dataInstances = DataStreamLoader.open("/Users/andresmasegosa/Dropbox/Amidst/datasets/uci-text/docword.nips.arff");
  List<DataOnMemory<DataInstance>> batches = BatchSpliteratorByID.streamOverDocuments(dataInstances, 1).collect(Collectors.toList());
  Collections.shuffle(batches);
  DataOnMemoryListContainer<DataInstance> newData = new DataOnMemoryListContainer<DataInstance>(dataInstances.getAttributes());
  for (DataOnMemory<DataInstance> batch : batches) {
    for (DataInstance dataInstance : batch) {
      newData.add(dataInstance);
    }
  }
  DataStreamWriter.writeDataToFile(newData,"/Users/andresmasegosa/Dropbox/Amidst/datasets/uci-text/docword.nips.shuffled.arff");
}

  public static void main(String[] args) {
    //Load the data set
    DataStream<DataInstance> data = DataStreamLoader.open("./datasets/artificialDataset.arff");

    //Define the model (internally the skeleton is fixed)
    Model model = new FactorAnalysis(data.getAttributes());

    //Print the skeleton of the model
    System.out.println(model.getDAG());

    //Learnt the parameters of the model
    model.updateModel(data);


    System.out.println(model.getModel());

  }
}

public static void main(String[] args) {
  String filename = "datasets/bnaic2015/BCC/Month0.arff";
  DataStream<DataInstance> data = DataStreamLoader.open(filename);
  //Learn the model
  Model model = new CustomGaussianMixture(data.getAttributes());
  model.updateModel(data);
  BayesianNetwork bn = model.getModel();
  System.out.println(bn);
}

public static void main(String[] args) {
  String filename = "datasets/simulated/cajamar.arff";
  DataStream<DataInstance> data = DataStreamLoader.open(filename);
  //Learn the model
  Model model = new CustomGaussianMixture(data.getAttributes());
  model.updateModel(data);
  BayesianNetwork bn = model.getModel();
  System.out.println(bn);
}

  public static void main(String[] args) {
    //Load the data set
    DataStream<DataInstance> data = DataStreamLoader.open("./datasets/artificialDataset.arff");

    //Define the model (internally the skeleton is fixed)
    Model model = new GaussianMixture(data.getAttributes());

    //Print the skeleton of the model
    System.out.println(model.getDAG());

    //Learnt the parameters of the model
    model.updateModel(data);

    //Print the learnt model
    System.out.println(model.getModel());

  }
}

  public static void main(String[] args) {
    //Load the data set
    DataStream<DataInstance> data = DataStreamLoader.open("./datasets/artificialDataset.arff");

    //Define the model (internally the skeleton is fixed)
    Model model = new MixtureOfFactorAnalysers(data.getAttributes());

    //Print the skeleton of the model
    System.out.println(model.getDAG());

    //Learnt the parameters of the model
    model.updateModel(data);

    //Print the learnt model
    System.out.println(model.getModel());

  }
}

/**
 * This method contains an example about how to compute the monthly average value of one variable.
 * @throws Exception if an error occurs while reading the file.
 */
public static void computeMonthlyAverage() throws Exception {
  //For each month of the period
  for (int i = 0; i < MONTHS; i++) {
    //We load the data for that month
    DataStream<DataInstance> instances = DataStreamLoader.open("./datasets/bnaic2015/BCC/Month"+i+".arff");
    //We get the attribute credit
    Attribute credit = instances.getAttributes().getAttributeByName("credit");
    //We compute the average, using a parallel stream.
    double creditMonthlyAverage = instances
        .parallelStream(1000)
        .mapToDouble(instance -> instance.getValue(credit))
        .average()
        .getAsDouble();
    //We print the computed average
    System.out.println("Average Monthly Credit " + i + ": " + creditMonthlyAverage);
  }
}

public static void main(String[] args) throws ExceptionHugin, IOException {
  //Load the datastream
  String filename = "datasets/simulated/cajamar.arff";
  DataStream<DataInstance> data = DataStreamLoader.open(filename);
  //Learn the model
  Model model = new FactorAnalysis(data.getAttributes());
  // ((MixtureOfFactorAnalysers)model).setNumberOfLatentVariables(3);
  model.updateModel(data);
  BayesianNetwork bn = model.getModel();
  System.out.println(bn);
  // Save with .bn format
  BayesianNetworkWriter.save(bn, "networks/simulated/exampleBN.bn");
  // Save with hugin format
  //BayesianNetworkWriterToHugin.save(bn, "networks/simulated/exampleBN.net");
}

public static void main(String[] args) throws IOException, ClassNotFoundException {
  int batchSize = 100;
  DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff");;
  //We can load a Bayesian network using the static class BayesianNetworkLoader
   DAG dag = DAGGenerator.getNaiveBayesStructure(data.getAttributes(), "B");
  BayesianNetwork bn = new BayesianNetwork(dag);
  data.getAttributes().forEach(attribute -> System.out.println(attribute.getName()));
  //Now we print the loaded model
  System.out.println(bn.toString());
  EF_BayesianNetwork efbn = new EF_BayesianNetwork(bn);
  SufficientStatistics sumSS = data.parallelStream(batchSize)
      .map(efbn::getSufficientStatistics) //see Program 6
      .reduce(SufficientStatistics::sumVectorNonStateless).get();
      //.reduce((v1,v2) -> {v1.sum(v2); return v1;}).get();
  sumSS.divideBy(data.stream().count());
  for(int i=0; i<sumSS.size(); i++) {
    System.out.println(sumSS.get(i));
  }
}

public static void main(String[] args) throws  IOException {
  //Load the datastream
  String filename = "datasets/simulated/docs.nips.small.arff";
  DataStream<DataInstance> data = DataStreamLoader.open(filename);
  //Learn the model
  Model model = new LDA(data.getAttributes());
  model.updateModel(data);
  BayesianNetwork bn = model.getModel();
  //System.out.println(bn);
  // Save with .bn format
  BayesianNetworkWriter.save(bn, "networks/simulated/exampleBN.bn");
}

public static void main(String[] args) throws Exception {
  //We can open the data stream using the static class DataStreamLoader
  DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff");
  //We create a ParameterLearningAlgorithm object with the MaximumLikehood builder
  ParameterLearningAlgorithm parameterLearningAlgorithm = new ParallelMaximumLikelihood();
  //We fix the DAG structure
  parameterLearningAlgorithm.setDAG(getNaiveBayesStructure(data,0));
  //We should invoke this method before processing any data
  parameterLearningAlgorithm.initLearning();
  //Then we show how we can perform parameter learnig by a sequential updating of data batches.
  for (DataOnMemory<DataInstance> batch : data.iterableOverBatches(100)){
    parameterLearningAlgorithm.updateModel(batch);
  }
  //And we get the model
  BayesianNetwork bnModel = parameterLearningAlgorithm.getLearntBayesianNetwork();
  //We print the model
  System.out.println(bnModel.toString());
}

public static void main(String[] args) throws Exception {
  //We can open the data stream using the static class DataStreamLoader
  DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff");
  //We create a SVB object
  SVB parameterLearningAlgorithm = new SVB();
  //We fix the DAG structure
  parameterLearningAlgorithm.setDAG(DAGGenerator.getHiddenNaiveBayesStructure(data.getAttributes(),"H",2));
  //We fix the size of the window, which must be equal to the size of the data batches we use for learning
  parameterLearningAlgorithm.setWindowsSize(100);
  //We can activate the output
  parameterLearningAlgorithm.setOutput(true);
  //We should invoke this method before processing any data
  parameterLearningAlgorithm.initLearning();
  //Then we show how we can perform parameter learning by a sequential updating of data batches.
  for (DataOnMemory<DataInstance> batch : data.iterableOverBatches(100)){
    double log_likelhood_of_batch = parameterLearningAlgorithm.updateModel(batch);
    System.out.println("Log-Likelihood of Batch: "+ log_likelhood_of_batch);
  }
  //And we get the model
  BayesianNetwork bnModel = parameterLearningAlgorithm.getLearntBayesianNetwork();
  //We print the model
  System.out.println(bnModel.toString());
}

public static void main(String[] args) throws Exception {
  //We can open the data stream using the static class DataStreamLoader
  DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff");
  //We create a ParameterLearningAlgorithm object with the MaximumLikelihoodFading builder
  MaximumLikelihoodFading parameterLearningAlgorithm = new MaximumLikelihoodFading();
  //We fix the DAG structure
  parameterLearningAlgorithm.setDAG(MaximimumLikelihoodByBatchExample.getNaiveBayesStructure(data, 0));
  //We fix the fading or forgeting factor
  parameterLearningAlgorithm.setFadingFactor(0.9);
  //We set the batch size which will be employed to learn the model
  parameterLearningAlgorithm.setWindowsSize(100);
  //We set the data which is going to be used for leaning the parameters
  parameterLearningAlgorithm.setDataStream(data);
  //We perform the learning
  parameterLearningAlgorithm.runLearning();
  //And we get the model
  BayesianNetwork bnModel = parameterLearningAlgorithm.getLearntBayesianNetwork();
  //We print the model
  System.out.println(bnModel.toString());
}

public static void runParallelKMeans() throws IOException {
  DataStream<DataInstance> data;
  if(isSampleData()) {
    BayesianNetworkGenerator.setNumberOfGaussianVars(getNumGaussVars());
    BayesianNetworkGenerator.setNumberOfMultinomialVars(getNumDiscVars(), getNumStates());
    BayesianNetwork bn = BayesianNetworkGenerator.generateBayesianNetwork();
    data = new BayesianNetworkSampler(bn).sampleToDataStream(getSampleSize());
    DataStreamWriter.writeDataToFile(data, pathToFile);
  }
  data = DataStreamLoader.open(pathToFile);
  ParallelKMeans.setBatchSize(batchSize);
  double[][] centroids = ParallelKMeans.learnKMeans(getK(),data);
  for (int clusterID = 0; clusterID < centroids.length; clusterID++) {
    System.out.println("Cluster "+(clusterID+1)+": "+Arrays.toString(centroids[clusterID]));
  }
}

public static void main(String[] args) throws Exception {
  //We can open the data stream using the static class DataStreamLoader
  DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff");
  //We create a SVB object
  SVB parameterLearningAlgorithm = new SVB();
  //We fix the DAG structure
  parameterLearningAlgorithm.setDAG(DAGGenerator.getHiddenNaiveBayesStructure(data.getAttributes(),"GlobalHidden", 2));
  //We fix the size of the window
  parameterLearningAlgorithm.setWindowsSize(100);
  //We can activate the output
  parameterLearningAlgorithm.setOutput(true);
  //We set the data which is going to be used for leaning the parameters
  parameterLearningAlgorithm.setDataStream(data);
  //We perform the learning
  parameterLearningAlgorithm.runLearning();
  //And we get the model
  BayesianNetwork bnModel = parameterLearningAlgorithm.getLearntBayesianNetwork();
  //We print the model
  System.out.println(bnModel.toString());
}

public static void main(String[] args) throws Exception {
  //We can open the data stream using the static class DataStreamLoader
  DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff");
  //We create a ParallelSVB object
  ParallelSVB parameterLearningAlgorithm = new ParallelSVB();
  //We fix the number of cores we want to exploit
  parameterLearningAlgorithm.setNCores(4);
  //We fix the DAG structure, which is a Naive Bayes with a global latent binary variable
  parameterLearningAlgorithm.setDAG(DAGGenerator.getHiddenNaiveBayesStructure(data.getAttributes(), "H", 2));
  //We fix the size of the window
  parameterLearningAlgorithm.getSVBEngine().setWindowsSize(100);
  //We can activate the output
  parameterLearningAlgorithm.setOutput(true);
  //We set the data which is going to be used for leaning the parameters
  parameterLearningAlgorithm.setDataStream(data);
  //We perform the learning
  parameterLearningAlgorithm.runLearning();
  //And we get the model
  BayesianNetwork bnModel = parameterLearningAlgorithm.getLearntBayesianNetwork();
  //We print the model
  System.out.println(bnModel.toString());
}

public static void main(String[] args) {
  DataStream<DataInstance> dataInstances = DataStreamLoader.open("/Users/andresmasegosa/Dropbox/Amidst/datasets/NFSAbstracts/abstractByYear/abstract_90.arff");
  //DataOnMemory<DataInstance> dataInstances = DataStreamLoader.loadDataOnMemoryFromFile("/Users/andresmasegosa/Dropbox/Amidst/datasets/NFSAbstracts/abstractByYear/abstract_90.arff");
  SVB svb = new SVB();
  PlateauLDA plateauLDA = new PlateauLDA(dataInstances.getAttributes(),"word","count");
  plateauLDA.setNTopics(10);
  plateauLDA.getVMP().setTestELBO(true);
  plateauLDA.getVMP().setMaxIter(10);
  plateauLDA.getVMP().setOutput(true);
  plateauLDA.getVMP().setThreshold(0.1);
  svb.setPlateuStructure(plateauLDA);
  svb.setOutput(true);
  svb.initLearning();
  //System.out.println(dataInstances.getNumberOfDataInstances());
  //svb.updateModel(dataInstances);
  BatchSpliteratorByID.streamOverDocuments(dataInstances, 500).sequential().forEach(batch -> {
    System.out.println("Batch: "+ batch.getNumberOfDataInstances());
    svb.updateModel(batch);
  });
}

How to use openmethodin eu.amidst.core.io.DataStreamLoader

Best Java code snippets using eu.amidst.core.io.DataStreamLoader.open (Showing top 20 results out of 315)

How to use
open
method
in
eu.amidst.core.io.DataStreamLoader