@Override public byte[] map(T value) throws Exception { return Serialization.serializeObject(value); } });
public DataSet<DataPosteriorAssignment> computePosteriorAssignment(List<Variable> latentVariables){ Attribute seq_id = this.dataFlink.getAttributes().getSeq_id(); if (seq_id==null) throw new IllegalArgumentException("Functionality only available for data sets with a seq_id attribute"); try{ Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.dag.getName()); config.setBytes(SVB, Serialization.serializeObject(svb)); config.setBytes(LATENT_VARS, Serialization.serializeObject(latentVariables)); return this.dataFlink .getBatchedDataSet(this.batchSize) .flatMap(new ParallelVBMapInferenceAssignment()) .withParameters(config); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } }
public DataSet<DataPosteriorAssignment> computePosteriorAssignment(DataFlink<DataInstance> dataFlink, List<Variable> latentVariables){ Attribute seq_id = dataFlink.getAttributes().getSeq_id(); if (seq_id==null) throw new IllegalArgumentException("Functionality only available for data sets with a seq_id attribute"); try{ Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.dag.getName()); config.setBytes(SVB, Serialization.serializeObject(svb)); config.setBytes(LATENT_VARS, Serialization.serializeObject(latentVariables)); return dataFlink .getBatchedDataSet(this.batchSize) .flatMap(new ParallelVBMapInferenceAssignment()) .withParameters(config); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } }
public DataSet<DataPosterior> computePosterior(DataFlink<DataInstance> dataFlink, List<Variable> latentVariables){ Attribute seq_id = dataFlink.getAttributes().getSeq_id(); if (seq_id==null) throw new IllegalArgumentException("Functionality only available for data sets with a seq_id attribute"); try{ Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.dag.getName()); config.setBytes(SVB, Serialization.serializeObject(svb)); config.setBytes(LATENT_VARS, Serialization.serializeObject(latentVariables)); return dataFlink .getBatchedDataSet(this.batchSize) .flatMap(new ParallelVBMapInference()) .withParameters(config); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } }
public DataSet<DataPosteriorAssignment> computePosteriorAssignment(DataFlink<DataInstance> dataFlink, List<Variable> latentVariables){ Attribute seq_id = dataFlink.getAttributes().getSeq_id(); if (seq_id==null) throw new IllegalArgumentException("Functionality only available for data sets with a seq_id attribute"); try{ Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.dag.getName()); config.setBytes(SVB, Serialization.serializeObject(svb)); config.setBytes(LATENT_VARS, Serialization.serializeObject(latentVariables)); return dataFlink .getBatchedDataSet(this.batchSize) .flatMap(new ParallelVBMapInferenceAssignment()) .withParameters(config); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } }
public DataSet<DataPosteriorAssignment> computePosteriorAssignment(DataFlink<DataInstance> dataFlink, List<Variable> latentVariables){ Attribute seq_id = dataFlink.getAttributes().getSeq_id(); if (seq_id==null) throw new IllegalArgumentException("Functionality only available for data sets with a seq_id attribute"); try{ Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.getName()); config.setBytes(SVB, Serialization.serializeObject(svb)); config.setBytes(LATENT_VARS, Serialization.serializeObject(latentVariables)); return dataFlink .getBatchedDataSet(this.batchSize,batchConverter) .flatMap(new ParallelVBMapInferenceAssignment()) .withParameters(config); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } }
public DataSet<DataPosterior> computePosterior(List<Variable> latentVariables){ Attribute seq_id = this.dataFlink.getAttributes().getSeq_id(); if (seq_id==null) throw new IllegalArgumentException("Functionality only available for data sets with a seq_id attribute"); try{ Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.dag.getName()); config.setBytes(SVB, Serialization.serializeObject(svb)); config.setBytes(LATENT_VARS, Serialization.serializeObject(latentVariables)); return this.dataFlink .getBatchedDataSet(this.batchSize) .flatMap(new ParallelVBMapInference()) .withParameters(config); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } }
public DataSet<DataPosterior> computePosterior(DataFlink<DataInstance> dataFlink, List<Variable> latentVariables){ Attribute seq_id = dataFlink.getAttributes().getSeq_id(); if (seq_id==null) throw new IllegalArgumentException("Functionality only available for data sets with a seq_id attribute"); try{ Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.getName()); config.setBytes(SVB, Serialization.serializeObject(svb)); config.setBytes(LATENT_VARS, Serialization.serializeObject(latentVariables)); return dataFlink .getBatchedDataSet(this.batchSize,batchConverter) .flatMap(new ParallelVBMapInference()) .withParameters(config); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } }
public DataSet<DataPosterior> computePosterior(DataFlink<DataInstance> dataFlink, List<Variable> latentVariables){ Attribute seq_id = dataFlink.getAttributes().getSeq_id(); if (seq_id==null) throw new IllegalArgumentException("Functionality only available for data sets with a seq_id attribute"); try{ Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.dag.getName()); config.setBytes(SVB, Serialization.serializeObject(svb)); config.setBytes(LATENT_VARS, Serialization.serializeObject(latentVariables)); return dataFlink .getBatchedDataSet(this.batchSize) .flatMap(new ParallelVBMapInference()) .withParameters(config); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } }
public static <T extends DataInstance> DataSet<DataOnMemory<T>> toBatches(DataFlink<T> data, int batchSize){ try{ Configuration config = new Configuration(); config.setInteger(BATCH_SIZE, batchSize); config.setBytes(ATTRIBUTES, Serialization.serializeObject(data.getAttributes())); return data.getDataSet().mapPartition(new DataBatch<T>()).withParameters(config); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } }
public static <T extends DataInstance> DataSet<DataOnMemory<T>> toBatchesBySeqID(DataFlink<T> data, int batchSize){ try{ Configuration config = new Configuration(); config.setInteger(BATCH_SIZE, batchSize); config.setBytes(ATTRIBUTES, Serialization.serializeObject(data.getAttributes())); return data.getDataSet().mapPartition(new DataBatchBySeqID<T>()).withParameters(config); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } }
public DataSet<DataPosterior> computePosterior(DataFlink<DataInstance> dataFlink){ Attribute seq_id = dataFlink.getAttributes().getSeq_id(); if (seq_id==null) throw new IllegalArgumentException("Functionality only available for data sets with a seq_id attribute"); try{ Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.getName()); config.setBytes(SVB, Serialization.serializeObject(svb)); return dataFlink .getBatchedDataSet(this.batchSize,batchConverter) .flatMap(new ParallelVBMapInference()) .withParameters(config); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } }
public DataSet<DataPosterior> computePosterior(){ Attribute seq_id = this.dataFlink.getAttributes().getSeq_id(); if (seq_id==null) throw new IllegalArgumentException("Functionality only available for data sets with a seq_id attribute"); try{ Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.dag.getName()); config.setBytes(SVB, Serialization.serializeObject(svb)); return this.dataFlink .getBatchedDataSet(this.batchSize) .flatMap(new ParallelVBMapInference()) .withParameters(config); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } }
public DataSet<DataPosterior> computePosterior(DataFlink<DataInstance> dataFlink){ Attribute seq_id = dataFlink.getAttributes().getSeq_id(); if (seq_id==null) throw new IllegalArgumentException("Functionality only available for data sets with a seq_id attribute"); try{ Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.dag.getName()); config.setBytes(SVB, Serialization.serializeObject(svb)); return dataFlink .getBatchedDataSet(this.batchSize) .flatMap(new ParallelVBMapInference()) .withParameters(config); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } } public double updateModel(DataFlink<DataInstance> dataUpdate){
public DataSet<DataPosterior> computePosterior(DataFlink<DataInstance> dataFlink){ Attribute seq_id = dataFlink.getAttributes().getSeq_id(); if (seq_id==null) throw new IllegalArgumentException("Functionality only available for data sets with a seq_id attribute"); try{ Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.dag.getName()); config.setBytes(SVB, Serialization.serializeObject(svb)); return dataFlink .getBatchedDataSet(this.batchSize) .flatMap(new ParallelVBMapInference()) .withParameters(config); }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } } public double updateModel(DataFlink<DataInstance> dataUpdate){
public static double computeELBO(DataFlink<DataInstance> dataFlink, SVB svb, Function2<DataFlink<DataInstance>,Integer,DataSet<DataOnMemory<DataInstance>>> batchConverter){ svb.setOutput(false); double elbo = svb.getPlateuStructure().getNonReplictedNodes().mapToDouble(node -> svb.getPlateuStructure().getVMP().computeELBO(node)).sum(); try { Configuration config = new Configuration(); config.setBytes(SVB, Serialization.serializeObject(svb)); config.setBytes(PRIOR, Serialization.serializeObject(svb.getPlateuStructure().getPlateauNaturalParameterPosterior())); DataSet<DataOnMemory<DataInstance>> batches; if (batchConverter!=null) batches= dataFlink.getBatchedDataSet(svb.getWindowsSize(),batchConverter); else batches= dataFlink.getBatchedDataSet(svb.getWindowsSize()); elbo += batches.map(new ParallelVBMapELBO()) .withParameters(config) .reduce(new ReduceFunction<Double>() { @Override public Double reduce(Double aDouble, Double t1) throws Exception { return aDouble + t1; } }).collect().get(0); } catch (Exception e) { e.printStackTrace(); } svb.setOutput(true); return elbo; }
svb.initLearning(); Serialization.serializeObject(svb);
config.setBytes(SVB, Serialization.serializeObject(svb));
/** * {@inheritDoc} */ @Override public double updateModel(DataFlink<DataInstance> dataUpdate) { try { Configuration config = new Configuration(); config.setString(BN_NAME, this.dag.getName()); config.setBytes(EFBN_NAME, Serialization.serializeObject(efBayesianNetwork)); DataSet<DataInstance> dataset = dataUpdate.getDataSet(); this.sumSS = dataset.map(new SufficientSatisticsMAP()) .withParameters(config) .reduce(new SufficientSatisticsReduce()) .collect().get(0); //Add the prior sumSS.sum(efBayesianNetwork.createInitSufficientStatistics()); JobExecutionResult result = dataset.getExecutionEnvironment().getLastJobExecutionResult(); numInstances = result.getAccumulatorResult(ParallelMaximumLikelihood.COUNTER_NAME+"_"+this.dag.getName()); numInstances++;//Initial counts }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } return this.getLogMarginalProbability(); }
/** * {@inheritDoc} */ @Override public double updateModel(DataFlink<DataInstance> dataUpdate) { try { this.initLearning(); Configuration config = new Configuration(); config.setString(ParameterLearningAlgorithm.BN_NAME, this.dag.getName()); config.setBytes(EFBN_NAME, Serialization.serializeObject(efBayesianNetwork)); DataSet<DataInstance> dataset = dataUpdate.getDataSet(); this.sumSS = dataset.mapPartition(new SufficientSatisticsMAP()) .withParameters(config) .reduce(new SufficientSatisticsReduce()) .collect().get(0); //Add the prior sumSS.sum(efBayesianNetwork.createInitSufficientStatistics()); JobExecutionResult result = dataset.getExecutionEnvironment().getLastJobExecutionResult(); numInstances = result.getAccumulatorResult(ParallelMaximumLikelihood2.COUNTER_NAME+"_"+this.dag.getName()); numInstances++;//Initial counts }catch(Exception ex){ throw new UndeclaredThrowableException(ex); } return this.getLogMarginalProbability(); }