public void initLearning() { efBayesianNetwork = new EF_BayesianNetwork(dag); sumSS = efBayesianNetwork.createInitSufficientStatistics(); }
private static Iterable<SufficientStatistics> sufficientStatisticsMap(Iterator<DataInstance> iter, EF_BayesianNetwork ef_bayesianNetwork) { SufficientStatistics accumulator = null; if (iter.hasNext()) accumulator = ef_bayesianNetwork.getSufficientStatistics(iter.next());; while (iter.hasNext()) { accumulator.sum(ef_bayesianNetwork.getSufficientStatistics(iter.next())); } // FIXME: Is this really necessary? ArrayList<SufficientStatistics> result = new ArrayList<SufficientStatistics>(); result.add(accumulator); return result; }
/** * {@inheritDoc} */ @Override public BayesianNetwork getLearntBayesianNetwork() { //Normalize the sufficient statistics SufficientStatistics normalizedSS = efBayesianNetwork.createZeroSufficientStatistics(); normalizedSS.copy(sumSS); normalizedSS.divideBy(numInstances); efBayesianNetwork.setMomentParameters(normalizedSS); return efBayesianNetwork.toBayesianNetwork(dag); }
/** * Creates a new EF_BayesianNetwork object given a {@link DynamicBayesianNetwork} object. * @param dbn a {@link DynamicBayesianNetwork} object. */ public EF_DynamicBayesianNetwork(DynamicBayesianNetwork dbn){ this.bayesianNetworkTime0 = new EF_BayesianNetwork(); this.bayesianNetworkTimeT = new EF_BayesianNetwork(); this.bayesianNetworkTime0.setDistributionList(dbn.getConditionalDistributionsTime0().stream().map(dist -> dist.<EF_ConditionalDistribution>toEFConditionalDistribution()).collect(Collectors.toList())); this.bayesianNetworkTimeT.setDistributionList(dbn.getConditionalDistributionsTimeT().stream().map(dist -> dist.<EF_ConditionalDistribution>toEFConditionalDistribution()).collect(Collectors.toList())); }
@Override public SufficientStatistics createInitSufficientStatistics() { DynamiceBNCompoundVector vectorSS = this.createEmtpyCompoundVector(); vectorSS.setIndicatorTime0(1.0); vectorSS.setVectorTime0(this.bayesianNetworkTime0.createInitSufficientStatistics()); vectorSS.setIndicatorTimeT(1.0); vectorSS.setVectorTimeT(this.bayesianNetworkTimeT.createInitSufficientStatistics()); return vectorSS; }
/** * Creates a new EF_BayesianNetwork object given a {@link DynamicDAG} object. * @param dag a {@link DynamicDAG} object. */ public EF_DynamicBayesianNetwork(DynamicDAG dag) { this.bayesianNetworkTime0 = new EF_BayesianNetwork(dag.getParentSetsTime0()); this.bayesianNetworkTimeT = new EF_BayesianNetwork(dag.getParentSetsTimeT()); }
/** * {@inheritDoc} */ @Override public void updateNaturalFromMomentParameters() { DynamiceBNCompoundVector globalMomentsParam = (DynamiceBNCompoundVector)this.momentParameters; DynamiceBNCompoundVector vectorNatural = this.createEmtpyCompoundVector(); globalMomentsParam.getVectorTime0().divideBy(globalMomentsParam.getIndicatorTime0()); globalMomentsParam.getVectorTimeT().divideBy(globalMomentsParam.getIndicatorTimeT()); this.bayesianNetworkTime0.setMomentParameters((MomentParameters)globalMomentsParam.getVectorTime0()); this.bayesianNetworkTimeT.setMomentParameters((MomentParameters)globalMomentsParam.getVectorTimeT()); vectorNatural.setVectorTime0(this.bayesianNetworkTime0.getNaturalParameters()); vectorNatural.setVectorTimeT(this.bayesianNetworkTimeT.getNaturalParameters()); this.naturalParameters=vectorNatural; }
@Override public double computeLogProbabilityOf(DynamicDataInstance dataInstance) { if (dataInstance.getTimeID()==0) return this.bayesianNetworkTime0.computeLogProbabilityOf(dataInstance); else return this.bayesianNetworkTimeT.computeLogProbabilityOf(dataInstance); }
/** * {@inheritDoc} */ @Override public double updateModel(DataSpark dataUpdate) { //this.sumSS = computeSufficientStatistics(dataUpdate, efBayesianNetwork); this.sumSS = dataUpdate.getDataSet() .mapPartitions( iter -> sufficientStatisticsMap(iter, this.efBayesianNetwork)) .reduce(ParallelMaximumLikelihood::sufficientStatisticsReduce); //Add the prior sumSS.sum(efBayesianNetwork.createInitSufficientStatistics()); // FIXME: Maybe a generic method from the class, what about caching? numInstances = dataSpark.getDataSet().count(); numInstances++;//Initial counts return this.getLogMarginalProbability(); }
public static void main(String[] args) throws IOException, ClassNotFoundException { int batchSize = 100; DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff");; //We can load a Bayesian network using the static class BayesianNetworkLoader DAG dag = DAGGenerator.getNaiveBayesStructure(data.getAttributes(), "B"); BayesianNetwork bn = new BayesianNetwork(dag); data.getAttributes().forEach(attribute -> System.out.println(attribute.getName())); //Now we print the loaded model System.out.println(bn.toString()); EF_BayesianNetwork efbn = new EF_BayesianNetwork(bn); SufficientStatistics sumSS = data.parallelStream(batchSize) .map(efbn::getSufficientStatistics) //see Program 6 .reduce(SufficientStatistics::sumVectorNonStateless).get(); //.reduce((v1,v2) -> {v1.sum(v2); return v1;}).get(); sumSS.divideBy(data.stream().count()); for(int i=0; i<sumSS.size(); i++) { System.out.println(sumSS.get(i)); } }
/** * {@inheritDoc} */ @Override public BayesianNetwork getLearntBayesianNetwork() { //Normalize the sufficient statistics SufficientStatistics normalizedSS = efBayesianNetwork.createZeroSufficientStatistics(); normalizedSS.copy(sumSS); normalizedSS.divideBy(numInstances); efBayesianNetwork.setMomentParameters(normalizedSS); return efBayesianNetwork.toBayesianNetwork(dag); }
public void initLearning() { efBayesianNetwork = new EF_BayesianNetwork(dag); sumSS = efBayesianNetwork.createInitSufficientStatistics(); }
/** * {@inheritDoc} */ @Override public double updateModel(DataFlink<DataInstance> dataUpdate) { try { Configuration config = new Configuration(); config.setString(BN_NAME, this.dag.getName()); config.setBytes(EFBN_NAME, Serialization.serializeObject(efBayesianNetwork)); DataSet<DataInstance> dataset = dataUpdate.getDataSet(); this.sumSS = dataset.map(new SufficientSatisticsMAP()) .withParameters(config) .reduce(new SufficientSatisticsReduce()) .collect().get(0); //Add the prior sumSS.sum(efBayesianNetwork.createInitSufficientStatistics()); JobExecutionResult result = dataset.getExecutionEnvironment().getLastJobExecutionResult(); numInstances = result.getAccumulatorResult(ParallelMaximumLikelihood.COUNTER_NAME+"_"+this.dag.getName()); numInstances++;//Initial counts }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } return this.getLogMarginalProbability(); }
@Override public SufficientStatistics map(DataInstance dataInstance) throws Exception { this.counterInstances.add(1.0); return this.ef_bayesianNetwork.getSufficientStatistics(dataInstance); }
/** * {@inheritDoc} */ @Override public BayesianNetwork getLearntBayesianNetwork() { //Normalize the sufficient statistics SufficientStatistics normalizedSS = efBayesianNetwork.createZeroSufficientStatistics(); normalizedSS.copy(sumSS); normalizedSS.divideBy(numInstances); efBayesianNetwork.setMomentParameters(normalizedSS); return efBayesianNetwork.toBayesianNetwork(dag); }
public void initLearning() { efBayesianNetwork = new EF_BayesianNetwork(dag); sumSS = efBayesianNetwork.createInitSufficientStatistics(); }
/** * {@inheritDoc} */ @Override public double updateModel(DataFlink<DataInstance> dataUpdate) { try { this.initLearning(); Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.dag.getName()); config.setBytes(EFBN_NAME, Serialization.serializeObject(efBayesianNetwork)); DataSet<DataInstance> dataset = dataUpdate.getDataSet(); this.sumSS = dataset.mapPartition(new SufficientSatisticsMAP()) .withParameters(config) .reduce(new SufficientSatisticsReduce()) .collect().get(0); //Add the prior sumSS.sum(efBayesianNetwork.createInitSufficientStatistics()); JobExecutionResult result = dataset.getExecutionEnvironment().getLastJobExecutionResult(); numInstances = result.getAccumulatorResult(ParallelMaximumLikelihood2.COUNTER_NAME+"_"+this.dag.getName()); numInstances++;//Initial counts }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } return this.getLogMarginalProbability(); }
@Override public void mapPartition(Iterable<DataInstance> values, Collector<SufficientStatistics> out) throws Exception { SufficientStatistics accumulator = null; for (DataInstance value : values) { this.counterInstances.add(1.0); if (accumulator==null) accumulator=this.ef_bayesianNetwork.getSufficientStatistics(value); else accumulator.sum(this.ef_bayesianNetwork.getSufficientStatistics(value)); } out.collect(accumulator); } }
/** * {@inheritDoc} */ @Override public SufficientStatistics getSufficientStatistics(DynamicDataInstance data) { DynamiceBNCompoundVector vectorSS = this.createEmtpyCompoundVector(); if (data.getTimeID()==0) { vectorSS.setIndicatorTime0(1.0); vectorSS.setVectorTime0(this.bayesianNetworkTime0.getSufficientStatistics(data)); }else { vectorSS.setIndicatorTimeT(1.0); vectorSS.setVectorTimeT(this.bayesianNetworkTimeT.getSufficientStatistics(data)); } return vectorSS; }