burlap.mdp.auxiliary.common java code examples

@Override
public void setCurStateTo(State s) {
  if(this.stateGenerator == null){
    this.stateGenerator = new ConstantStateGenerator(s);
  }
  this.curState = s;
}

/**
 * Initializes
 * @param domain the learning domain
 * @param gamma the discount factor
 * @param vfa the value function approximation to use
 */
public ApproximateQLearning(SADomain domain, double gamma, ParametricFunction.ParametricStateActionFunction vfa) {
  this(domain, gamma, vfa, new ShallowIdentityStateMapping());
}

/**
 * Initializes the world.
 * @param domain the SGDomain the world will use
 * @param jr the joint reward function
 * @param tf the terminal function
 * @param initialState the initial state of the world every time a new game starts
 */
public World(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, State initialState){
  this.init(domain, domain.getJointActionModel(), jr, tf, new ConstantStateGenerator(initialState), new IdentityStateMapping());
}

final TerminalFunction tf = new SinglePFTF(
    PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION));
final ConstantStateGenerator sg = new ConstantStateGenerator(s);

@Before
public void setup() {
  this.gw = new GridWorldDomain(11, 11);
  this.gw.setMapToFourRooms();
  this.gw.setRf(new UniformCostRF());
  TerminalFunction tf = new SinglePFTF(PropositionalFunction.findPF(gw.generatePfs(), PF_AT_LOCATION));
  this.gw.setTf(tf);
  this.domain = this.gw.generateDomain();
  this.goalCondition = new TFGoalCondition(tf);
  this.hashingFactory = new SimpleHashableStateFactory();
}

/**
 * Creates s SGWorldShell with a new world using the domain and using std in and std out.
 * @param domain the SGDomain
 * @param s the state in which the world will be rooted
 */
public SGWorldShell(SGDomain domain, State s){
  this(domain, System.in, System.out, new World(domain, new NullJointRewardFunction(), new NullTermination(), s));
}

/**
 * Initializes the WorldGenerator.
 * @param domain the SGDomain the world will use
 * @param jr the joint reward function
 * @param tf the terminal function
 * @param sg a state generator for generating initial states of a game
 */
public ConstantWorldGenerator(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg){
  this.CWGInit(domain, jr, tf, sg, new IdentityStateMapping());
}

public SimulatedEnvironment(SampleModel model, State initialState) {
  this.stateGenerator = new ConstantStateGenerator(initialState);
  this.curState = initialState;
  this.model = model;
}

@Override
public OOSADomain generateDomain() {
  OOSADomain domain = new OOSADomain();
  domain.addStateClass(CLASS_AGENT, ExGridAgent.class)
      .addStateClass(CLASS_LOCATION, EXGridLocation.class);
  domain.addActionTypes(
      new UniversalActionType(ACTION_NORTH),
      new UniversalActionType(ACTION_SOUTH),
      new UniversalActionType(ACTION_EAST),
      new UniversalActionType(ACTION_WEST));
  OODomain.Helper.addPfsToDomain(domain, this.generatePfs());
  OOGridWorldStateModel smodel = new OOGridWorldStateModel();
  RewardFunction rf = new SingleGoalPFRF(domain.propFunction(PF_AT), 100, -1);
  TerminalFunction tf = new SinglePFTF(domain.propFunction(PF_AT));
  domain.setModel(new FactoredModel(smodel, rf, tf));
  return domain;
}

/**
 * Initializes the data members for the visual explorer.
 * @param domain the stochastic game domain to be explored
 * @param painter the 2D visualizer for states
 * @param baseState the initial state from which to explore
 * @param w the width of the state visualizer
 * @param h the height of the state visualizer
 */
public SGVisualExplorer(SGDomain domain, Visualizer painter, State baseState, int w, int h){
  this.init(domain, new World(domain, new NullJointRewardFunction(), new NullTermination(), baseState), painter, w, h);
}

/**
 * Initializes with a default 0.1 epsilon greedy policy/strategy
 * @param d the domain in which the agent will act
 * @param discount the discount factor
 * @param learningRate the learning rate
 * @param qInitizalizer the Q-value initialization method
 * @param hashFactory the state hashing factory
 */
public SGNaiveQLAgent(SGDomain d, double discount, double learningRate, QFunction qInitizalizer, HashableStateFactory hashFactory) {
  this.init(d);
  this.discount = discount;
  this.learningRate = new ConstantLR(learningRate);
  this.hashFactory = hashFactory;
  this.qInit = qInitizalizer;
  
  this.qMap = new HashMap<HashableState, List<QValue>>();
  stateRepresentations = new HashMap<HashableState, State>();
  this.policy = new EpsilonGreedy(this, 0.1);
  
  this.storedMapAbstraction = new ShallowIdentityStateMapping();
}

/**
 * This constructor is deprecated, because {@link burlap.mdp.stochasticgames.SGDomain} objects are now expected
 * to have a {@link JointModel} associated with them, making the constructor parameter for it
 * unnecessary. Instead use the constructor {@link #ConstantWorldGenerator(burlap.mdp.stochasticgames.SGDomain, JointRewardFunction, burlap.mdp.core.TerminalFunction, StateGenerator)}
 * @param domain the SGDomain the world will use
 * @param jam the joint action model that specifies the transition dynamics
 * @param jr the joint reward function
 * @param tf the terminal function
 * @param sg a state generator for generating initial states of a game
 */
@Deprecated
public ConstantWorldGenerator(SGDomain domain, JointModel jam, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg){
  this.CWGInit(domain, jr, tf, sg, new IdentityStateMapping());
}

public SimulatedEnvironment(SADomain domain, State initialState) {
  this.stateGenerator = new ConstantStateGenerator(initialState);
  this.curState = initialState;
  if(domain.getModel() == null){
    throw new RuntimeException("SimulatedEnvironment requires a Domain with a model, but the input domain does not have one.");
  }
  this.model = domain.getModel();
}

/**
 * A main method showing example code that would be used to create an instance of Prisoner's dilemma and begin playing it with a 
 * {@link SGWorldShell}.
 * @param args command line args
 */
public static void main(String [] args){
  
  SingleStageNormalFormGame game = SingleStageNormalFormGame.getPrisonersDilemma();
  SGDomain domain = (SGDomain)game.generateDomain();
  JointRewardFunction r = game.getJointRewardFunction();
  World w = new World(domain, r, new NullTermination(), (State)new NFGameState(2));
  SGWorldShell shell = new SGWorldShell(domain, w);
  shell.start();
  
}

/**
 * Initializes with a default Q-value of 0 and a 0.1 epsilon greedy policy/strategy
 * @param d the domain in which the agent will act
 * @param discount the discount factor
 * @param learningRate the learning rate
 * @param hashFactory the state hashing factory
 */
public SGNaiveQLAgent(SGDomain d, double discount, double learningRate, HashableStateFactory hashFactory) {
  this.init(d);
  this.discount = discount;
  this.learningRate = new ConstantLR(learningRate);
  this.hashFactory = hashFactory;
  this.qInit = new ConstantValueFunction(0.);
  
  this.qMap = new HashMap<HashableState, List<QValue>>();
  stateRepresentations = new HashMap<HashableState, State>();
  this.policy = new EpsilonGreedy(this, 0.1);
  
  this.storedMapAbstraction = new ShallowIdentityStateMapping();
}

/**
 * Initializes the world.
 * @param domain the SGDomain the world will use
 * @param jr the joint reward function
 * @param tf the terminal function
 * @param sg a state generator for generating initial states of a game
 */
public World(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg){
  this.init(domain, domain.getJointActionModel(), jr, tf, sg, new IdentityStateMapping());
}

/**
 * Plans from the input state and then returns a {@link burlap.behavior.policy.GreedyQPolicy} that greedily
 * selects the action with the highest Q-value and breaks ties uniformly randomly.
 * @param initialState the initial state of the planning problem
 * @return a {@link burlap.behavior.policy.GreedyQPolicy}.
 */
@Override
public GreedyQPolicy planFromState(State initialState) {
  if(this.model == null){
    throw new RuntimeException("LSPI cannot execute planFromState because the reward function and/or terminal function for planning have not been set. Use the initializeForPlanning method to set them.");
  }
  if(planningCollector == null){
    this.planningCollector = new SARSCollector.UniformRandomSARSCollector(this.actionTypes);
  }
  this.dataset = this.planningCollector.collectNInstances(new ConstantStateGenerator(initialState), this.model, this.numSamplesForPlanning, Integer.MAX_VALUE, this.dataset);
  return this.runPolicyIteration(this.maxNumPlanningIterations, this.maxChange);
}

/**
 * Creates a world instance for this game in which the provided agents join in the order they are passed. This object
 * uses the provided domain instance generated from this object instead of generating a new one.
 * @param domain the SGDomain instance
 * @param agents the agents to join the created world.
 * @return a world instance with the provided agents having already joined.
 */
public World createRepeatedGameWorld(SGDomain domain, SGAgent...agents){
  
  //grab the joint reward function from our bimatrix game in the more general BURLAP joint reward function interface
  JointRewardFunction jr = this.getJointRewardFunction();
  
  //game repeats forever unless manually stopped after T times.
  TerminalFunction tf = new NullTermination();
  
  //set up the initial state generator for the world, which for a bimatrix game is trivial
  StateGenerator sg = new NFGameState(agents.length);
  //create a world to synchronize the actions of agents in this domain and record results
  World w = new World(domain, jr, tf, sg);
  
  for(SGAgent a : agents){
    w.join(a);
  }
  
  return w;
  
}

/**
 * Initializes with a default 0.1 epsilon greedy policy/strategy
 * @param d the domain in which the agent will act
 * @param discount the discount factor
 * @param learningRate the learning rate
 * @param defaultQ the default to which all Q-values will be initialized
 * @param hashFactory the state hashing factory
 */
public SGNaiveQLAgent(SGDomain d, double discount, double learningRate, double defaultQ, HashableStateFactory hashFactory) {
  this.init(d);
  this.discount = discount;
  this.learningRate = new ConstantLR(learningRate);
  this.hashFactory = hashFactory;
  this.qInit = new ConstantValueFunction(defaultQ);
  
  this.qMap = new HashMap<HashableState, List<QValue>>();
  stateRepresentations = new HashMap<HashableState, State>();
  this.policy = new EpsilonGreedy(this, 0.1);
  
  this.storedMapAbstraction = new ShallowIdentityStateMapping();
}

@Override
public OOSADomain generateDomain() {
  OOSADomain domain = new OOSADomain();
  int [][] cmap = this.getMap();
  domain.addStateClass(CLASS_AGENT, GridAgent.class).addStateClass(CLASS_LOCATION, GridLocation.class);
  GridWorldModel smodel = new GridWorldModel(cmap, getTransitionDynamics());
  RewardFunction rf = this.rf;
  TerminalFunction tf = this.tf;
  if(rf == null){
    rf = new UniformCostRF();
  }
  if(tf == null){
    tf = new NullTermination();
  }
  FactoredModel model = new FactoredModel(smodel, rf, tf);
  domain.setModel(model);
  domain.addActionTypes(
      new UniversalActionType(ACTION_NORTH),
      new UniversalActionType(ACTION_SOUTH),
      new UniversalActionType(ACTION_EAST),
      new UniversalActionType(ACTION_WEST));
  
  OODomain.Helper.addPfsToDomain(domain, this.generatePfs());
  
  return domain;
}

How to use burlap.mdp.auxiliary.common

Best Java code snippets using burlap.mdp.auxiliary.common (Showing top 20 results out of 315)