/** * {@inheritDoc} */ @Override public BayesianNetwork getLearntBayesianNetwork() { //Normalize the sufficient statistics SufficientStatistics normalizedSS = efBayesianNetwork.createZeroSufficientStatistics(); normalizedSS.copy(sumSS); normalizedSS.divideBy(numInstances); efBayesianNetwork.setMomentParameters(normalizedSS); return efBayesianNetwork.toBayesianNetwork(dag); }
@Override public SufficientStatistics reduce(SufficientStatistics value1, SufficientStatistics value2) throws Exception { value2.sum(value1); return value2; }
public static void main(String[] args) throws IOException, ClassNotFoundException { int batchSize = 100; DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff");; //We can load a Bayesian network using the static class BayesianNetworkLoader DAG dag = DAGGenerator.getNaiveBayesStructure(data.getAttributes(), "B"); BayesianNetwork bn = new BayesianNetwork(dag); data.getAttributes().forEach(attribute -> System.out.println(attribute.getName())); //Now we print the loaded model System.out.println(bn.toString()); EF_BayesianNetwork efbn = new EF_BayesianNetwork(bn); SufficientStatistics sumSS = data.parallelStream(batchSize) .map(efbn::getSufficientStatistics) //see Program 6 .reduce(SufficientStatistics::sumVectorNonStateless).get(); //.reduce((v1,v2) -> {v1.sum(v2); return v1;}).get(); sumSS.divideBy(data.stream().count()); for(int i=0; i<sumSS.size(); i++) { System.out.println(sumSS.get(i)); } }
private double computeELBO() { double elbo = this.vmp.getNodes().parallelStream().filter(node -> node.isActive() && !node.isObserved()).mapToDouble(node -> this.vmp.computeELBO(node)).sum(); elbo += this.vmp.getNodes() .parallelStream() .filter(node -> node.isActive() && node.isObserved()).mapToDouble(node -> { EF_BaseDistribution_MultinomialParents base = (EF_BaseDistribution_MultinomialParents) node.getPDist(); Variable topicVariable = (Variable) base.getMultinomialParents().get(0); Map<Variable, MomentParameters> momentParents = node.getMomentParents(); double localELBO = 0; MomentParameters topicMoments = momentParents.get(topicVariable); int wordIndex = (int) node.getAssignment().getValue(node.getMainVariable())%node.getMainVariable().getNumberOfStates(); for (int i = 0; i < topicMoments.size(); i++) { EF_SparseMultinomial_Dirichlet dist = (EF_SparseMultinomial_Dirichlet)base.getBaseEFConditionalDistribution(i); MomentParameters dirichletMoments = momentParents.get(dist.getDirichletVariable()); localELBO += node.getSufficientStatistics().get(wordIndex)*dirichletMoments.get(wordIndex)*topicMoments.get(i); } return localELBO; }).sum(); return elbo; }
nodeWord.setAssignment(data.get(i)); nodeWord.getSufficientStatistics().multiplyBy(data.get(i).getValue(wordCountAtt));
EF_SparseMultinomial_Dirichlet dist = (EF_SparseMultinomial_Dirichlet)base.getBaseEFConditionalDistribution(i); MomentParameters dirichletMoments = momentParents.get(dist.getDirichletVariable()); localELBO += node.getSufficientStatistics().get(wordIndex)*dirichletMoments.get(wordIndex)*topicMoments.get(i);
nodeWord.setAssignment(data.get(i)); nodeWord.getSufficientStatistics().multiplyBy(data.get(i).getValue(wordCountAtt));
/** * {@inheritDoc} */ @Override public BayesianNetwork getLearntBayesianNetwork() { //Normalize the sufficient statistics SufficientStatistics normalizedSS = efBayesianNetwork.createZeroSufficientStatistics(); normalizedSS.copy(sumSS); normalizedSS.divideBy(numInstances); efBayesianNetwork.setMomentParameters(normalizedSS); return efBayesianNetwork.toBayesianNetwork(dag); }
@Override public SufficientStatistics reduce(SufficientStatistics value1, SufficientStatistics value2) throws Exception { value2.sum(value1); return value2; }
/** * {@inheritDoc} */ @Override public BayesianNetwork getLearntBayesianNetwork() { //Normalize the sufficient statistics SufficientStatistics normalizedSS = efBayesianNetwork.createZeroSufficientStatistics(); normalizedSS.copy(sumSS); normalizedSS.divideBy(numInstances); efBayesianNetwork.setMomentParameters(normalizedSS); return efBayesianNetwork.toBayesianNetwork(dag); }
private static SufficientStatistics sufficientStatisticsReduce(SufficientStatistics sta1, SufficientStatistics sta2) { sta1.sum(sta2); return sta1; }
/** * {@inheritDoc} */ @Override public DynamicBayesianNetwork getLearntDBN() { //Normalize the sufficient statistics SufficientStatistics normalizedSS = efBayesianNetwork.createZeroSufficientStatistics(); normalizedSS.copy(sumSS); normalizedSS.divideBy(dataInstanceCount.get()); efBayesianNetwork.setMomentParameters(normalizedSS); return efBayesianNetwork.toDynamicBayesianNetwork(dag); }
private static Iterable<SufficientStatistics> sufficientStatisticsMap(Iterator<DataInstance> iter, EF_BayesianNetwork ef_bayesianNetwork) { SufficientStatistics accumulator = null; if (iter.hasNext()) accumulator = ef_bayesianNetwork.getSufficientStatistics(iter.next());; while (iter.hasNext()) { accumulator.sum(ef_bayesianNetwork.getSufficientStatistics(iter.next())); } // FIXME: Is this really necessary? ArrayList<SufficientStatistics> result = new ArrayList<SufficientStatistics>(); result.add(accumulator); return result; }
/** * {@inheritDoc} */ @Override public double updateModel(DataOnMemory<DynamicDataInstance> batch) { this.sumSS.sum(batch.stream() .map(efBayesianNetwork::getSufficientStatistics) .reduce(SufficientStatistics::sumVectorNonStateless).get()); dataInstanceCount.addAndGet(batch.getNumberOfDataInstances()); return Double.NaN; }
/** * {@inheritDoc} */ @Override public double updateModel(DataStream<DynamicDataInstance> dataStream) { Stream<DataOnMemory<DynamicDataInstance>> stream = null; if (parallelMode){ stream = dataStream.parallelStreamOfBatches(windowsSize); }else{ stream = dataStream.streamOfBatches(windowsSize); } sumSS.sum(stream .peek(batch -> { dataInstanceCount.getAndAdd(batch.getNumberOfDataInstances()); if (debug) System.out.println("Parallel ML procesando "+(int)dataInstanceCount.get() +" instances"); }) .map(batch -> batch.stream() .map(efBayesianNetwork::getSufficientStatistics) .reduce(SufficientStatistics::sumVectorNonStateless) .get()) .reduce(SufficientStatistics::sumVectorNonStateless).get()); return Double.NaN; }
/** * {@inheritDoc} */ @Override public void runLearning() { this.initLearning(); Stream<DataOnMemory<DynamicDataInstance>> stream = null; if (parallelMode){ stream = dataStream.parallelStreamOfBatches(windowsSize); }else{ stream = dataStream.streamOfBatches(windowsSize); } sumSS.sum(stream .peek(batch -> { dataInstanceCount.getAndAdd(batch.getNumberOfDataInstances()); if (debug) System.out.println("Parallel ML procesando "+(int)dataInstanceCount.get() +" instances"); }) .map(batch -> batch.stream() .map(efBayesianNetwork::getSufficientStatistics) .reduce(SufficientStatistics::sumVectorNonStateless) .get()) .reduce(SufficientStatistics::sumVectorNonStateless).get()); }
@Override public void mapPartition(Iterable<DataInstance> values, Collector<SufficientStatistics> out) throws Exception { SufficientStatistics accumulator = null; for (DataInstance value : values) { this.counterInstances.add(1.0); if (accumulator==null) accumulator=this.ef_bayesianNetwork.getSufficientStatistics(value); else accumulator.sum(this.ef_bayesianNetwork.getSufficientStatistics(value)); } out.collect(accumulator); } }
/** * {@inheritDoc} */ @Override public DynamicBayesianNetwork getLearntDBN() { //Normalize the sufficient statistics DynamicPartialSufficientSatistics partialSufficientSatistics = DynamicPartialSufficientSatistics.createZeroPartialSufficientStatistics(efBayesianNetwork); partialSufficientSatistics.copy(this.sumSS); partialSufficientSatistics.normalize(); SufficientStatistics finalSS = efBayesianNetwork.createZeroSufficientStatistics(); finalSS.sum(partialSufficientSatistics.getCompoundVector()); efBayesianNetwork.setMomentParameters(finalSS); return efBayesianNetwork.toDynamicBayesianNetwork(dag); }
/** * {@inheritDoc} */ @Override public double updateModel(DataSpark dataUpdate) { //this.sumSS = computeSufficientStatistics(dataUpdate, efBayesianNetwork); this.sumSS = dataUpdate.getDataSet() .mapPartitions( iter -> sufficientStatisticsMap(iter, this.efBayesianNetwork)) .reduce(ParallelMaximumLikelihood::sufficientStatisticsReduce); //Add the prior sumSS.sum(efBayesianNetwork.createInitSufficientStatistics()); // FIXME: Maybe a generic method from the class, what about caching? numInstances = dataSpark.getDataSet().count(); numInstances++;//Initial counts return this.getLogMarginalProbability(); }
/** * {@inheritDoc} */ @Override public double updateModel(DataFlink<DataInstance> dataUpdate) { try { Configuration config = new Configuration(); config.setString(BN_NAME, this.dag.getName()); config.setBytes(EFBN_NAME, Serialization.serializeObject(efBayesianNetwork)); DataSet<DataInstance> dataset = dataUpdate.getDataSet(); this.sumSS = dataset.map(new SufficientSatisticsMAP()) .withParameters(config) .reduce(new SufficientSatisticsReduce()) .collect().get(0); //Add the prior sumSS.sum(efBayesianNetwork.createInitSufficientStatistics()); JobExecutionResult result = dataset.getExecutionEnvironment().getLastJobExecutionResult(); numInstances = result.getAccumulatorResult(ParallelMaximumLikelihood.COUNTER_NAME+"_"+this.dag.getName()); numInstances++;//Initial counts }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } return this.getLogMarginalProbability(); }