@Override public SufficientStatistics reduce(SufficientStatistics value1, SufficientStatistics value2) throws Exception { value2.sum(value1); return value2; }
@Override public SufficientStatistics reduce(SufficientStatistics value1, SufficientStatistics value2) throws Exception { value2.sum(value1); return value2; }
private static SufficientStatistics sufficientStatisticsReduce(SufficientStatistics sta1, SufficientStatistics sta2) { sta1.sum(sta2); return sta1; }
private static Iterable<SufficientStatistics> sufficientStatisticsMap(Iterator<DataInstance> iter, EF_BayesianNetwork ef_bayesianNetwork) { SufficientStatistics accumulator = null; if (iter.hasNext()) accumulator = ef_bayesianNetwork.getSufficientStatistics(iter.next());; while (iter.hasNext()) { accumulator.sum(ef_bayesianNetwork.getSufficientStatistics(iter.next())); } // FIXME: Is this really necessary? ArrayList<SufficientStatistics> result = new ArrayList<SufficientStatistics>(); result.add(accumulator); return result; }
/** * {@inheritDoc} */ @Override public double updateModel(DataOnMemory<DynamicDataInstance> batch) { this.sumSS.sum(batch.stream() .map(efBayesianNetwork::getSufficientStatistics) .reduce(SufficientStatistics::sumVectorNonStateless).get()); dataInstanceCount.addAndGet(batch.getNumberOfDataInstances()); return Double.NaN; }
/** * {@inheritDoc} */ @Override public double updateModel(DataStream<DynamicDataInstance> dataStream) { Stream<DataOnMemory<DynamicDataInstance>> stream = null; if (parallelMode){ stream = dataStream.parallelStreamOfBatches(windowsSize); }else{ stream = dataStream.streamOfBatches(windowsSize); } sumSS.sum(stream .peek(batch -> { dataInstanceCount.getAndAdd(batch.getNumberOfDataInstances()); if (debug) System.out.println("Parallel ML procesando "+(int)dataInstanceCount.get() +" instances"); }) .map(batch -> batch.stream() .map(efBayesianNetwork::getSufficientStatistics) .reduce(SufficientStatistics::sumVectorNonStateless) .get()) .reduce(SufficientStatistics::sumVectorNonStateless).get()); return Double.NaN; }
/** * {@inheritDoc} */ @Override public void runLearning() { this.initLearning(); Stream<DataOnMemory<DynamicDataInstance>> stream = null; if (parallelMode){ stream = dataStream.parallelStreamOfBatches(windowsSize); }else{ stream = dataStream.streamOfBatches(windowsSize); } sumSS.sum(stream .peek(batch -> { dataInstanceCount.getAndAdd(batch.getNumberOfDataInstances()); if (debug) System.out.println("Parallel ML procesando "+(int)dataInstanceCount.get() +" instances"); }) .map(batch -> batch.stream() .map(efBayesianNetwork::getSufficientStatistics) .reduce(SufficientStatistics::sumVectorNonStateless) .get()) .reduce(SufficientStatistics::sumVectorNonStateless).get()); }
@Override public void mapPartition(Iterable<DataInstance> values, Collector<SufficientStatistics> out) throws Exception { SufficientStatistics accumulator = null; for (DataInstance value : values) { this.counterInstances.add(1.0); if (accumulator==null) accumulator=this.ef_bayesianNetwork.getSufficientStatistics(value); else accumulator.sum(this.ef_bayesianNetwork.getSufficientStatistics(value)); } out.collect(accumulator); } }
/** * {@inheritDoc} */ @Override public DynamicBayesianNetwork getLearntDBN() { //Normalize the sufficient statistics DynamicPartialSufficientSatistics partialSufficientSatistics = DynamicPartialSufficientSatistics.createZeroPartialSufficientStatistics(efBayesianNetwork); partialSufficientSatistics.copy(this.sumSS); partialSufficientSatistics.normalize(); SufficientStatistics finalSS = efBayesianNetwork.createZeroSufficientStatistics(); finalSS.sum(partialSufficientSatistics.getCompoundVector()); efBayesianNetwork.setMomentParameters(finalSS); return efBayesianNetwork.toDynamicBayesianNetwork(dag); }
/** * {@inheritDoc} */ @Override public double updateModel(DataSpark dataUpdate) { //this.sumSS = computeSufficientStatistics(dataUpdate, efBayesianNetwork); this.sumSS = dataUpdate.getDataSet() .mapPartitions( iter -> sufficientStatisticsMap(iter, this.efBayesianNetwork)) .reduce(ParallelMaximumLikelihood::sufficientStatisticsReduce); //Add the prior sumSS.sum(efBayesianNetwork.createInitSufficientStatistics()); // FIXME: Maybe a generic method from the class, what about caching? numInstances = dataSpark.getDataSet().count(); numInstances++;//Initial counts return this.getLogMarginalProbability(); }
/** * {@inheritDoc} */ @Override public double updateModel(DataFlink<DataInstance> dataUpdate) { try { Configuration config = new Configuration(); config.setString(BN_NAME, this.dag.getName()); config.setBytes(EFBN_NAME, Serialization.serializeObject(efBayesianNetwork)); DataSet<DataInstance> dataset = dataUpdate.getDataSet(); this.sumSS = dataset.map(new SufficientSatisticsMAP()) .withParameters(config) .reduce(new SufficientSatisticsReduce()) .collect().get(0); //Add the prior sumSS.sum(efBayesianNetwork.createInitSufficientStatistics()); JobExecutionResult result = dataset.getExecutionEnvironment().getLastJobExecutionResult(); numInstances = result.getAccumulatorResult(ParallelMaximumLikelihood.COUNTER_NAME+"_"+this.dag.getName()); numInstances++;//Initial counts }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } return this.getLogMarginalProbability(); }
/** * {@inheritDoc} */ @Override public double updateModel(DataFlink<DataInstance> dataUpdate) { try { this.initLearning(); Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.dag.getName()); config.setBytes(EFBN_NAME, Serialization.serializeObject(efBayesianNetwork)); DataSet<DataInstance> dataset = dataUpdate.getDataSet(); this.sumSS = dataset.mapPartition(new SufficientSatisticsMAP()) .withParameters(config) .reduce(new SufficientSatisticsReduce()) .collect().get(0); //Add the prior sumSS.sum(efBayesianNetwork.createInitSufficientStatistics()); JobExecutionResult result = dataset.getExecutionEnvironment().getLastJobExecutionResult(); numInstances = result.getAccumulatorResult(ParallelMaximumLikelihood2.COUNTER_NAME+"_"+this.dag.getName()); numInstances++;//Initial counts }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } return this.getLogMarginalProbability(); }