eu.amidst.core.exponentialfamily.SufficientStatistics java code examples

/**
 * {@inheritDoc}
 */
@Override
public BayesianNetwork getLearntBayesianNetwork() {
  //Normalize the sufficient statistics
  SufficientStatistics normalizedSS = efBayesianNetwork.createZeroSufficientStatistics();
  normalizedSS.copy(sumSS);
  normalizedSS.divideBy(numInstances);
  efBayesianNetwork.setMomentParameters(normalizedSS);
  return efBayesianNetwork.toBayesianNetwork(dag);
}

@Override
public SufficientStatistics reduce(SufficientStatistics value1, SufficientStatistics value2) throws Exception {
  value2.sum(value1);
  return value2;
}

public static void main(String[] args) throws IOException, ClassNotFoundException {
  int batchSize = 100;
  DataStream<DataInstance> data = DataStreamLoader.open("datasets/simulated/WasteIncineratorSample.arff");;
  //We can load a Bayesian network using the static class BayesianNetworkLoader
   DAG dag = DAGGenerator.getNaiveBayesStructure(data.getAttributes(), "B");
  BayesianNetwork bn = new BayesianNetwork(dag);
  data.getAttributes().forEach(attribute -> System.out.println(attribute.getName()));
  //Now we print the loaded model
  System.out.println(bn.toString());
  EF_BayesianNetwork efbn = new EF_BayesianNetwork(bn);
  SufficientStatistics sumSS = data.parallelStream(batchSize)
      .map(efbn::getSufficientStatistics) //see Program 6
      .reduce(SufficientStatistics::sumVectorNonStateless).get();
      //.reduce((v1,v2) -> {v1.sum(v2); return v1;}).get();
  sumSS.divideBy(data.stream().count());
  for(int i=0; i<sumSS.size(); i++) {
    System.out.println(sumSS.get(i));
  }
}

private double computeELBO() {
  double elbo = this.vmp.getNodes().parallelStream().filter(node -> node.isActive() && !node.isObserved()).mapToDouble(node -> this.vmp.computeELBO(node)).sum();
  elbo += this.vmp.getNodes()
      .parallelStream()
      .filter(node -> node.isActive() && node.isObserved()).mapToDouble(node -> {
        EF_BaseDistribution_MultinomialParents base = (EF_BaseDistribution_MultinomialParents) node.getPDist();
        Variable topicVariable = (Variable) base.getMultinomialParents().get(0);
        Map<Variable, MomentParameters> momentParents = node.getMomentParents();
        double localELBO = 0;
        MomentParameters topicMoments = momentParents.get(topicVariable);
        int wordIndex = (int) node.getAssignment().getValue(node.getMainVariable())%node.getMainVariable().getNumberOfStates();
        for (int i = 0; i < topicMoments.size(); i++) {
          EF_SparseMultinomial_Dirichlet dist = (EF_SparseMultinomial_Dirichlet)base.getBaseEFConditionalDistribution(i);
          MomentParameters dirichletMoments = momentParents.get(dist.getDirichletVariable());
          localELBO += node.getSufficientStatistics().get(wordIndex)*dirichletMoments.get(wordIndex)*topicMoments.get(i);
        }
        return localELBO;
      }).sum();
  return elbo;
}

nodeWord.setAssignment(data.get(i));
nodeWord.getSufficientStatistics().multiplyBy(data.get(i).getValue(wordCountAtt));

EF_SparseMultinomial_Dirichlet dist = (EF_SparseMultinomial_Dirichlet)base.getBaseEFConditionalDistribution(i);
MomentParameters dirichletMoments = momentParents.get(dist.getDirichletVariable());
localELBO += node.getSufficientStatistics().get(wordIndex)*dirichletMoments.get(wordIndex)*topicMoments.get(i);

nodeWord.setAssignment(data.get(i));
nodeWord.getSufficientStatistics().multiplyBy(data.get(i).getValue(wordCountAtt));

/**
 * {@inheritDoc}
 */
@Override
public BayesianNetwork getLearntBayesianNetwork() {
  //Normalize the sufficient statistics
  SufficientStatistics normalizedSS = efBayesianNetwork.createZeroSufficientStatistics();
  normalizedSS.copy(sumSS);
  normalizedSS.divideBy(numInstances);
  efBayesianNetwork.setMomentParameters(normalizedSS);
  return efBayesianNetwork.toBayesianNetwork(dag);
}

@Override
public SufficientStatistics reduce(SufficientStatistics value1, SufficientStatistics value2) throws Exception {
  value2.sum(value1);
  return value2;
}

/**
 * {@inheritDoc}
 */
@Override
public BayesianNetwork getLearntBayesianNetwork() {
  //Normalize the sufficient statistics
  SufficientStatistics normalizedSS = efBayesianNetwork.createZeroSufficientStatistics();
  normalizedSS.copy(sumSS);
  normalizedSS.divideBy(numInstances);
  efBayesianNetwork.setMomentParameters(normalizedSS);
  return efBayesianNetwork.toBayesianNetwork(dag);
}

private static SufficientStatistics sufficientStatisticsReduce(SufficientStatistics sta1, SufficientStatistics sta2) {
  sta1.sum(sta2);
  return sta1;
}

/**
 * {@inheritDoc}
 */
@Override
public DynamicBayesianNetwork getLearntDBN() {
  //Normalize the sufficient statistics
  SufficientStatistics normalizedSS = efBayesianNetwork.createZeroSufficientStatistics();
  normalizedSS.copy(sumSS);
  normalizedSS.divideBy(dataInstanceCount.get());
  efBayesianNetwork.setMomentParameters(normalizedSS);
  return efBayesianNetwork.toDynamicBayesianNetwork(dag);
}

private static Iterable<SufficientStatistics> sufficientStatisticsMap(Iterator<DataInstance> iter, EF_BayesianNetwork ef_bayesianNetwork) {
  SufficientStatistics accumulator = null;
  if (iter.hasNext())
    accumulator = ef_bayesianNetwork.getSufficientStatistics(iter.next());;
  while (iter.hasNext()) {
      accumulator.sum(ef_bayesianNetwork.getSufficientStatistics(iter.next()));
  }
  // FIXME: Is this really necessary?
  ArrayList<SufficientStatistics> result = new ArrayList<SufficientStatistics>();
  result.add(accumulator);
  return result;
}

/**
 * {@inheritDoc}
 */
@Override
public double updateModel(DataOnMemory<DynamicDataInstance> batch) {
  this.sumSS.sum(batch.stream()
        .map(efBayesianNetwork::getSufficientStatistics)
        .reduce(SufficientStatistics::sumVectorNonStateless).get());
  dataInstanceCount.addAndGet(batch.getNumberOfDataInstances());
  return Double.NaN;
}

/**
 * {@inheritDoc}
 */
@Override
public double updateModel(DataStream<DynamicDataInstance> dataStream) {
  Stream<DataOnMemory<DynamicDataInstance>> stream = null;
  if (parallelMode){
    stream = dataStream.parallelStreamOfBatches(windowsSize);
  }else{
    stream = dataStream.streamOfBatches(windowsSize);
  }
  sumSS.sum(stream
      .peek(batch -> {
        dataInstanceCount.getAndAdd(batch.getNumberOfDataInstances());
        if (debug) System.out.println("Parallel ML procesando "+(int)dataInstanceCount.get() +" instances");
      })
      .map(batch ->  batch.stream()
          .map(efBayesianNetwork::getSufficientStatistics)
          .reduce(SufficientStatistics::sumVectorNonStateless)
          .get())
      .reduce(SufficientStatistics::sumVectorNonStateless).get());
  return Double.NaN;
}

/**
 * {@inheritDoc}
 */
@Override
public void runLearning() {
  this.initLearning();
  Stream<DataOnMemory<DynamicDataInstance>> stream = null;
  if (parallelMode){
    stream = dataStream.parallelStreamOfBatches(windowsSize);
  }else{
    stream = dataStream.streamOfBatches(windowsSize);
  }
  sumSS.sum(stream
      .peek(batch -> {
        dataInstanceCount.getAndAdd(batch.getNumberOfDataInstances());
        if (debug) System.out.println("Parallel ML procesando "+(int)dataInstanceCount.get() +" instances");
      })
      .map(batch ->  batch.stream()
                .map(efBayesianNetwork::getSufficientStatistics)
                .reduce(SufficientStatistics::sumVectorNonStateless)
                .get())
      .reduce(SufficientStatistics::sumVectorNonStateless).get());
}

  @Override
  public void mapPartition(Iterable<DataInstance> values, Collector<SufficientStatistics> out) throws Exception {
    SufficientStatistics accumulator = null;
    for (DataInstance value : values) {
      this.counterInstances.add(1.0);
      if (accumulator==null)
        accumulator=this.ef_bayesianNetwork.getSufficientStatistics(value);
      else
        accumulator.sum(this.ef_bayesianNetwork.getSufficientStatistics(value));
    }
    out.collect(accumulator);
  }
}

/**
 * {@inheritDoc}
 */
@Override
public DynamicBayesianNetwork getLearntDBN() {
  //Normalize the sufficient statistics
  DynamicPartialSufficientSatistics partialSufficientSatistics = DynamicPartialSufficientSatistics.createZeroPartialSufficientStatistics(efBayesianNetwork);
  partialSufficientSatistics.copy(this.sumSS);
  partialSufficientSatistics.normalize();
  SufficientStatistics finalSS = efBayesianNetwork.createZeroSufficientStatistics();
  finalSS.sum(partialSufficientSatistics.getCompoundVector());
  efBayesianNetwork.setMomentParameters(finalSS);
  return efBayesianNetwork.toDynamicBayesianNetwork(dag);
}

/**
 * {@inheritDoc}
 */
@Override
public double updateModel(DataSpark dataUpdate) {
    //this.sumSS = computeSufficientStatistics(dataUpdate, efBayesianNetwork);
    this.sumSS = dataUpdate.getDataSet()
      .mapPartitions( iter -> sufficientStatisticsMap(iter, this.efBayesianNetwork))
      .reduce(ParallelMaximumLikelihood::sufficientStatisticsReduce);
  //Add the prior
    sumSS.sum(efBayesianNetwork.createInitSufficientStatistics());
    // FIXME: Maybe a generic method from the class, what about caching?
    numInstances = dataSpark.getDataSet().count();
    numInstances++;//Initial counts
  return this.getLogMarginalProbability();
}

/**
 * {@inheritDoc}
 */
@Override
public double updateModel(DataFlink<DataInstance> dataUpdate) {
  try {
    Configuration config = new Configuration();
    config.setString(BN_NAME, this.dag.getName());
    config.setBytes(EFBN_NAME, Serialization.serializeObject(efBayesianNetwork));
    DataSet<DataInstance> dataset = dataUpdate.getDataSet();
    this.sumSS = dataset.map(new SufficientSatisticsMAP())
        .withParameters(config)
        .reduce(new SufficientSatisticsReduce())
        .collect().get(0);
    //Add the prior
    sumSS.sum(efBayesianNetwork.createInitSufficientStatistics());
    JobExecutionResult result = dataset.getExecutionEnvironment().getLastJobExecutionResult();
    numInstances = result.getAccumulatorResult(ParallelMaximumLikelihood.COUNTER_NAME+"_"+this.dag.getName());
    numInstances++;//Initial counts
  }catch(Exception ex){
    throw new UndeclaredThrowableException(ex);
  }
  return this.getLogMarginalProbability();
}

Most used methods

Popular in Java

Start an intent from android
getApplicationContext (Context)
requestLocationUpdates (LocationManager)
getResourceAsStream (ClassLoader)
Charset (java.nio.charset)
A charset is a named mapping between Unicode characters and byte sequences. Every Charset can decode
Connection (java.sql)
A connection represents a link from a Java application to a database. All SQL statements and results
BitSet (java.util)
The BitSet class implements abit array [http://en.wikipedia.org/wiki/Bit_array]. Each element is eit
ZipFile (java.util.zip)
This class provides random read access to a zip file. You pay more to read the zip file's central di
BoxLayout (javax.swing)
Reflections (org.reflections)
Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
Top PhpStorm plugins

How to useSufficientStatistics in eu.amidst.core.exponentialfamily

Best Java code snippets using eu.amidst.core.exponentialfamily.SufficientStatistics (Showing top 20 results out of 315)

How to use
SufficientStatistics
in
eu.amidst.core.exponentialfamily