public static void main(String[] args) throws Exception { //We can open the data stream using the static class DataStreamLoader DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/syntheticData.arff"); //We can save this data set to a new file using the static class DataStreamWriter DataStreamWriter.writeDataToFile(data, "datasets/simulated/tmp.arff"); } }
public static void main(String[] args) throws Exception { //We can open the data stream using the static class DataStreamLoader DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/syntheticData.arff"); //ReservoirSampling allows to create a DataOnMemory object containing a unfiorm subsample of the data stream DataOnMemory<DataInstance> dataOnMemory = ReservoirSampling.samplingNumberOfSamples(100, data); //We can save this data set to a new file using the static class DataStreamWriter DataStreamWriter.writeDataToFile(data, "datasets/simulated/subsample.arff"); } }
public static void process2(String[] args) throws IOException { DataStream<DataInstance> dataInstances = DataStreamLoader.open("/Users/andresmasegosa/Dropbox/Amidst/datasets/NFSAbstracts/docswords-joint.arff"); double minWord = Double.MAX_VALUE; double maxWord = -Double.MAX_VALUE; for (DataInstance dataInstance : dataInstances) { double word = dataInstance.getValue(dataInstance.getAttributes().getAttributeByName("word")); if (minWord>word) minWord = word; if (maxWord<word) maxWord=word; } System.out.println(minWord); System.out.println(maxWord); }
public static void shuflle(String[] args) throws IOException { //Utils.shuffleData("/Users/andresmasegosa/Dropbox/Amidst/datasets/uci-text/docword.nips.arff", "/Users/andresmasegosa/Dropbox/Amidst/datasets/uci-text/docword.nips.shuffled.arff"); DataStream<DataInstance> dataInstances = DataStreamLoader.open("/Users/andresmasegosa/Dropbox/Amidst/datasets/uci-text/docword.nips.arff"); List<DataOnMemory<DataInstance>> batches = BatchSpliteratorByID.streamOverDocuments(dataInstances, 1).collect(Collectors.toList()); Collections.shuffle(batches); DataOnMemoryListContainer<DataInstance> newData = new DataOnMemoryListContainer<DataInstance>(dataInstances.getAttributes()); for (DataOnMemory<DataInstance> batch : batches) { for (DataInstance dataInstance : batch) { newData.add(dataInstance); } } DataStreamWriter.writeDataToFile(newData,"/Users/andresmasegosa/Dropbox/Amidst/datasets/uci-text/docword.nips.shuffled.arff"); }
public static void main(String[] args) { //Load the data set DataStream<DataInstance> data = DataStreamLoader.open("./datasets/artificialDataset.arff"); //Define the model (internally the skeleton is fixed) Model model = new FactorAnalysis(data.getAttributes()); //Print the skeleton of the model System.out.println(model.getDAG()); //Learnt the parameters of the model model.updateModel(data); System.out.println(model.getModel()); } }
public static void main(String[] args) { String filename = "datasets/bnaic2015/BCC/Month0.arff"; DataStream<DataInstance> data = DataStreamLoader.open(filename); //Learn the model Model model = new CustomGaussianMixture(data.getAttributes()); model.updateModel(data); BayesianNetwork bn = model.getModel(); System.out.println(bn); }
public static void main(String[] args) { String filename = "datasets/simulated/cajamar.arff"; DataStream<DataInstance> data = DataStreamLoader.open(filename); //Learn the model Model model = new CustomGaussianMixture(data.getAttributes()); model.updateModel(data); BayesianNetwork bn = model.getModel(); System.out.println(bn); }
public static void main(String[] args) { //Load the data set DataStream<DataInstance> data = DataStreamLoader.open("./datasets/artificialDataset.arff"); //Define the model (internally the skeleton is fixed) Model model = new GaussianMixture(data.getAttributes()); //Print the skeleton of the model System.out.println(model.getDAG()); //Learnt the parameters of the model model.updateModel(data); //Print the learnt model System.out.println(model.getModel()); } }
public static void main(String[] args) { //Load the data set DataStream<DataInstance> data = DataStreamLoader.open("./datasets/artificialDataset.arff"); //Define the model (internally the skeleton is fixed) Model model = new MixtureOfFactorAnalysers(data.getAttributes()); //Print the skeleton of the model System.out.println(model.getDAG()); //Learnt the parameters of the model model.updateModel(data); //Print the learnt model System.out.println(model.getModel()); } }
/** * This method contains an example about how to compute the monthly average value of one variable. * @throws Exception if an error occurs while reading the file. */ public static void computeMonthlyAverage() throws Exception { //For each month of the period for (int i = 0; i < MONTHS; i++) { //We load the data for that month DataStream<DataInstance> instances = DataStreamLoader.open("./datasets/bnaic2015/BCC/Month"+i+".arff"); //We get the attribute credit Attribute credit = instances.getAttributes().getAttributeByName("credit"); //We compute the average, using a parallel stream. double creditMonthlyAverage = instances .parallelStream(1000) .mapToDouble(instance -> instance.getValue(credit)) .average() .getAsDouble(); //We print the computed average System.out.println("Average Monthly Credit " + i + ": " + creditMonthlyAverage); } }
public static void main(String[] args) throws ExceptionHugin, IOException { //Load the datastream String filename = "datasets/simulated/cajamar.arff"; DataStream<DataInstance> data = DataStreamLoader.open(filename); //Learn the model Model model = new FactorAnalysis(data.getAttributes()); // ((MixtureOfFactorAnalysers)model).setNumberOfLatentVariables(3); model.updateModel(data); BayesianNetwork bn = model.getModel(); System.out.println(bn); // Save with .bn format BayesianNetworkWriter.save(bn, "networks/simulated/exampleBN.bn"); // Save with hugin format //BayesianNetworkWriterToHugin.save(bn, "networks/simulated/exampleBN.net"); }
public static void main(String[] args) throws IOException, ClassNotFoundException { int batchSize = 100; DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff");; //We can load a Bayesian network using the static class BayesianNetworkLoader DAG dag = DAGGenerator.getNaiveBayesStructure(data.getAttributes(), "B"); BayesianNetwork bn = new BayesianNetwork(dag); data.getAttributes().forEach(attribute -> System.out.println(attribute.getName())); //Now we print the loaded model System.out.println(bn.toString()); EF_BayesianNetwork efbn = new EF_BayesianNetwork(bn); SufficientStatistics sumSS = data.parallelStream(batchSize) .map(efbn::getSufficientStatistics) //see Program 6 .reduce(SufficientStatistics::sumVectorNonStateless).get(); //.reduce((v1,v2) -> {v1.sum(v2); return v1;}).get(); sumSS.divideBy(data.stream().count()); for(int i=0; i<sumSS.size(); i++) { System.out.println(sumSS.get(i)); } }
public static void main(String[] args) throws IOException { //Load the datastream String filename = "datasets/simulated/docs.nips.small.arff"; DataStream<DataInstance> data = DataStreamLoader.open(filename); //Learn the model Model model = new LDA(data.getAttributes()); model.updateModel(data); BayesianNetwork bn = model.getModel(); //System.out.println(bn); // Save with .bn format BayesianNetworkWriter.save(bn, "networks/simulated/exampleBN.bn"); }
public static void main(String[] args) throws Exception { //We can open the data stream using the static class DataStreamLoader DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff"); //We create a ParameterLearningAlgorithm object with the MaximumLikehood builder ParameterLearningAlgorithm parameterLearningAlgorithm = new ParallelMaximumLikelihood(); //We fix the DAG structure parameterLearningAlgorithm.setDAG(getNaiveBayesStructure(data,0)); //We should invoke this method before processing any data parameterLearningAlgorithm.initLearning(); //Then we show how we can perform parameter learnig by a sequential updating of data batches. for (DataOnMemory<DataInstance> batch : data.iterableOverBatches(100)){ parameterLearningAlgorithm.updateModel(batch); } //And we get the model BayesianNetwork bnModel = parameterLearningAlgorithm.getLearntBayesianNetwork(); //We print the model System.out.println(bnModel.toString()); }
public static void main(String[] args) throws Exception { //We can open the data stream using the static class DataStreamLoader DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff"); //We create a SVB object SVB parameterLearningAlgorithm = new SVB(); //We fix the DAG structure parameterLearningAlgorithm.setDAG(DAGGenerator.getHiddenNaiveBayesStructure(data.getAttributes(),"H",2)); //We fix the size of the window, which must be equal to the size of the data batches we use for learning parameterLearningAlgorithm.setWindowsSize(100); //We can activate the output parameterLearningAlgorithm.setOutput(true); //We should invoke this method before processing any data parameterLearningAlgorithm.initLearning(); //Then we show how we can perform parameter learning by a sequential updating of data batches. for (DataOnMemory<DataInstance> batch : data.iterableOverBatches(100)){ double log_likelhood_of_batch = parameterLearningAlgorithm.updateModel(batch); System.out.println("Log-Likelihood of Batch: "+ log_likelhood_of_batch); } //And we get the model BayesianNetwork bnModel = parameterLearningAlgorithm.getLearntBayesianNetwork(); //We print the model System.out.println(bnModel.toString()); }
public static void main(String[] args) throws Exception { //We can open the data stream using the static class DataStreamLoader DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff"); //We create a ParameterLearningAlgorithm object with the MaximumLikelihoodFading builder MaximumLikelihoodFading parameterLearningAlgorithm = new MaximumLikelihoodFading(); //We fix the DAG structure parameterLearningAlgorithm.setDAG(MaximimumLikelihoodByBatchExample.getNaiveBayesStructure(data, 0)); //We fix the fading or forgeting factor parameterLearningAlgorithm.setFadingFactor(0.9); //We set the batch size which will be employed to learn the model parameterLearningAlgorithm.setWindowsSize(100); //We set the data which is going to be used for leaning the parameters parameterLearningAlgorithm.setDataStream(data); //We perform the learning parameterLearningAlgorithm.runLearning(); //And we get the model BayesianNetwork bnModel = parameterLearningAlgorithm.getLearntBayesianNetwork(); //We print the model System.out.println(bnModel.toString()); }
public static void runParallelKMeans() throws IOException { DataStream<DataInstance> data; if(isSampleData()) { BayesianNetworkGenerator.setNumberOfGaussianVars(getNumGaussVars()); BayesianNetworkGenerator.setNumberOfMultinomialVars(getNumDiscVars(), getNumStates()); BayesianNetwork bn = BayesianNetworkGenerator.generateBayesianNetwork(); data = new BayesianNetworkSampler(bn).sampleToDataStream(getSampleSize()); DataStreamWriter.writeDataToFile(data, pathToFile); } data = DataStreamLoader.open(pathToFile); ParallelKMeans.setBatchSize(batchSize); double[][] centroids = ParallelKMeans.learnKMeans(getK(),data); for (int clusterID = 0; clusterID < centroids.length; clusterID++) { System.out.println("Cluster "+(clusterID+1)+": "+Arrays.toString(centroids[clusterID])); } }
public static void main(String[] args) throws Exception { //We can open the data stream using the static class DataStreamLoader DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff"); //We create a SVB object SVB parameterLearningAlgorithm = new SVB(); //We fix the DAG structure parameterLearningAlgorithm.setDAG(DAGGenerator.getHiddenNaiveBayesStructure(data.getAttributes(),"GlobalHidden", 2)); //We fix the size of the window parameterLearningAlgorithm.setWindowsSize(100); //We can activate the output parameterLearningAlgorithm.setOutput(true); //We set the data which is going to be used for leaning the parameters parameterLearningAlgorithm.setDataStream(data); //We perform the learning parameterLearningAlgorithm.runLearning(); //And we get the model BayesianNetwork bnModel = parameterLearningAlgorithm.getLearntBayesianNetwork(); //We print the model System.out.println(bnModel.toString()); }
public static void main(String[] args) throws Exception { //We can open the data stream using the static class DataStreamLoader DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff"); //We create a ParallelSVB object ParallelSVB parameterLearningAlgorithm = new ParallelSVB(); //We fix the number of cores we want to exploit parameterLearningAlgorithm.setNCores(4); //We fix the DAG structure, which is a Naive Bayes with a global latent binary variable parameterLearningAlgorithm.setDAG(DAGGenerator.getHiddenNaiveBayesStructure(data.getAttributes(), "H", 2)); //We fix the size of the window parameterLearningAlgorithm.getSVBEngine().setWindowsSize(100); //We can activate the output parameterLearningAlgorithm.setOutput(true); //We set the data which is going to be used for leaning the parameters parameterLearningAlgorithm.setDataStream(data); //We perform the learning parameterLearningAlgorithm.runLearning(); //And we get the model BayesianNetwork bnModel = parameterLearningAlgorithm.getLearntBayesianNetwork(); //We print the model System.out.println(bnModel.toString()); }
public static void main(String[] args) { DataStream<DataInstance> dataInstances = DataStreamLoader.open("/Users/andresmasegosa/Dropbox/Amidst/datasets/NFSAbstracts/abstractByYear/abstract_90.arff"); //DataOnMemory<DataInstance> dataInstances = DataStreamLoader.loadDataOnMemoryFromFile("/Users/andresmasegosa/Dropbox/Amidst/datasets/NFSAbstracts/abstractByYear/abstract_90.arff"); SVB svb = new SVB(); PlateauLDA plateauLDA = new PlateauLDA(dataInstances.getAttributes(),"word","count"); plateauLDA.setNTopics(10); plateauLDA.getVMP().setTestELBO(true); plateauLDA.getVMP().setMaxIter(10); plateauLDA.getVMP().setOutput(true); plateauLDA.getVMP().setThreshold(0.1); svb.setPlateuStructure(plateauLDA); svb.setOutput(true); svb.initLearning(); //System.out.println(dataInstances.getNumberOfDataInstances()); //svb.updateModel(dataInstances); BatchSpliteratorByID.streamOverDocuments(dataInstances, 500).sequential().forEach(batch -> { System.out.println("Batch: "+ batch.getNumberOfDataInstances()); svb.updateModel(batch); }); }