@Override public void setCurStateTo(State s) { if(this.stateGenerator == null){ this.stateGenerator = new ConstantStateGenerator(s); } this.curState = s; }
/** * Initializes * @param domain the learning domain * @param gamma the discount factor * @param vfa the value function approximation to use */ public ApproximateQLearning(SADomain domain, double gamma, ParametricFunction.ParametricStateActionFunction vfa) { this(domain, gamma, vfa, new ShallowIdentityStateMapping()); }
/** * Initializes the world. * @param domain the SGDomain the world will use * @param jr the joint reward function * @param tf the terminal function * @param initialState the initial state of the world every time a new game starts */ public World(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, State initialState){ this.init(domain, domain.getJointActionModel(), jr, tf, new ConstantStateGenerator(initialState), new IdentityStateMapping()); }
final TerminalFunction tf = new SinglePFTF( PropositionalFunction.findPF(gw.generatePfs(), GridWorldDomain.PF_AT_LOCATION)); final ConstantStateGenerator sg = new ConstantStateGenerator(s);
@Before public void setup() { this.gw = new GridWorldDomain(11, 11); this.gw.setMapToFourRooms(); this.gw.setRf(new UniformCostRF()); TerminalFunction tf = new SinglePFTF(PropositionalFunction.findPF(gw.generatePfs(), PF_AT_LOCATION)); this.gw.setTf(tf); this.domain = this.gw.generateDomain(); this.goalCondition = new TFGoalCondition(tf); this.hashingFactory = new SimpleHashableStateFactory(); }
/** * Creates s SGWorldShell with a new world using the domain and using std in and std out. * @param domain the SGDomain * @param s the state in which the world will be rooted */ public SGWorldShell(SGDomain domain, State s){ this(domain, System.in, System.out, new World(domain, new NullJointRewardFunction(), new NullTermination(), s)); }
/** * Initializes the WorldGenerator. * @param domain the SGDomain the world will use * @param jr the joint reward function * @param tf the terminal function * @param sg a state generator for generating initial states of a game */ public ConstantWorldGenerator(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg){ this.CWGInit(domain, jr, tf, sg, new IdentityStateMapping()); }
public SimulatedEnvironment(SampleModel model, State initialState) { this.stateGenerator = new ConstantStateGenerator(initialState); this.curState = initialState; this.model = model; }
@Override public OOSADomain generateDomain() { OOSADomain domain = new OOSADomain(); domain.addStateClass(CLASS_AGENT, ExGridAgent.class) .addStateClass(CLASS_LOCATION, EXGridLocation.class); domain.addActionTypes( new UniversalActionType(ACTION_NORTH), new UniversalActionType(ACTION_SOUTH), new UniversalActionType(ACTION_EAST), new UniversalActionType(ACTION_WEST)); OODomain.Helper.addPfsToDomain(domain, this.generatePfs()); OOGridWorldStateModel smodel = new OOGridWorldStateModel(); RewardFunction rf = new SingleGoalPFRF(domain.propFunction(PF_AT), 100, -1); TerminalFunction tf = new SinglePFTF(domain.propFunction(PF_AT)); domain.setModel(new FactoredModel(smodel, rf, tf)); return domain; }
/** * Initializes the data members for the visual explorer. * @param domain the stochastic game domain to be explored * @param painter the 2D visualizer for states * @param baseState the initial state from which to explore * @param w the width of the state visualizer * @param h the height of the state visualizer */ public SGVisualExplorer(SGDomain domain, Visualizer painter, State baseState, int w, int h){ this.init(domain, new World(domain, new NullJointRewardFunction(), new NullTermination(), baseState), painter, w, h); }
/** * Initializes with a default 0.1 epsilon greedy policy/strategy * @param d the domain in which the agent will act * @param discount the discount factor * @param learningRate the learning rate * @param qInitizalizer the Q-value initialization method * @param hashFactory the state hashing factory */ public SGNaiveQLAgent(SGDomain d, double discount, double learningRate, QFunction qInitizalizer, HashableStateFactory hashFactory) { this.init(d); this.discount = discount; this.learningRate = new ConstantLR(learningRate); this.hashFactory = hashFactory; this.qInit = qInitizalizer; this.qMap = new HashMap<HashableState, List<QValue>>(); stateRepresentations = new HashMap<HashableState, State>(); this.policy = new EpsilonGreedy(this, 0.1); this.storedMapAbstraction = new ShallowIdentityStateMapping(); }
/** * This constructor is deprecated, because {@link burlap.mdp.stochasticgames.SGDomain} objects are now expected * to have a {@link JointModel} associated with them, making the constructor parameter for it * unnecessary. Instead use the constructor {@link #ConstantWorldGenerator(burlap.mdp.stochasticgames.SGDomain, JointRewardFunction, burlap.mdp.core.TerminalFunction, StateGenerator)} * @param domain the SGDomain the world will use * @param jam the joint action model that specifies the transition dynamics * @param jr the joint reward function * @param tf the terminal function * @param sg a state generator for generating initial states of a game */ @Deprecated public ConstantWorldGenerator(SGDomain domain, JointModel jam, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg){ this.CWGInit(domain, jr, tf, sg, new IdentityStateMapping()); }
public SimulatedEnvironment(SADomain domain, State initialState) { this.stateGenerator = new ConstantStateGenerator(initialState); this.curState = initialState; if(domain.getModel() == null){ throw new RuntimeException("SimulatedEnvironment requires a Domain with a model, but the input domain does not have one."); } this.model = domain.getModel(); }
/** * A main method showing example code that would be used to create an instance of Prisoner's dilemma and begin playing it with a * {@link SGWorldShell}. * @param args command line args */ public static void main(String [] args){ SingleStageNormalFormGame game = SingleStageNormalFormGame.getPrisonersDilemma(); SGDomain domain = (SGDomain)game.generateDomain(); JointRewardFunction r = game.getJointRewardFunction(); World w = new World(domain, r, new NullTermination(), (State)new NFGameState(2)); SGWorldShell shell = new SGWorldShell(domain, w); shell.start(); }
/** * Initializes with a default Q-value of 0 and a 0.1 epsilon greedy policy/strategy * @param d the domain in which the agent will act * @param discount the discount factor * @param learningRate the learning rate * @param hashFactory the state hashing factory */ public SGNaiveQLAgent(SGDomain d, double discount, double learningRate, HashableStateFactory hashFactory) { this.init(d); this.discount = discount; this.learningRate = new ConstantLR(learningRate); this.hashFactory = hashFactory; this.qInit = new ConstantValueFunction(0.); this.qMap = new HashMap<HashableState, List<QValue>>(); stateRepresentations = new HashMap<HashableState, State>(); this.policy = new EpsilonGreedy(this, 0.1); this.storedMapAbstraction = new ShallowIdentityStateMapping(); }
/** * Initializes the world. * @param domain the SGDomain the world will use * @param jr the joint reward function * @param tf the terminal function * @param sg a state generator for generating initial states of a game */ public World(SGDomain domain, JointRewardFunction jr, TerminalFunction tf, StateGenerator sg){ this.init(domain, domain.getJointActionModel(), jr, tf, sg, new IdentityStateMapping()); }
/** * Plans from the input state and then returns a {@link burlap.behavior.policy.GreedyQPolicy} that greedily * selects the action with the highest Q-value and breaks ties uniformly randomly. * @param initialState the initial state of the planning problem * @return a {@link burlap.behavior.policy.GreedyQPolicy}. */ @Override public GreedyQPolicy planFromState(State initialState) { if(this.model == null){ throw new RuntimeException("LSPI cannot execute planFromState because the reward function and/or terminal function for planning have not been set. Use the initializeForPlanning method to set them."); } if(planningCollector == null){ this.planningCollector = new SARSCollector.UniformRandomSARSCollector(this.actionTypes); } this.dataset = this.planningCollector.collectNInstances(new ConstantStateGenerator(initialState), this.model, this.numSamplesForPlanning, Integer.MAX_VALUE, this.dataset); return this.runPolicyIteration(this.maxNumPlanningIterations, this.maxChange); }
/** * Creates a world instance for this game in which the provided agents join in the order they are passed. This object * uses the provided domain instance generated from this object instead of generating a new one. * @param domain the SGDomain instance * @param agents the agents to join the created world. * @return a world instance with the provided agents having already joined. */ public World createRepeatedGameWorld(SGDomain domain, SGAgent...agents){ //grab the joint reward function from our bimatrix game in the more general BURLAP joint reward function interface JointRewardFunction jr = this.getJointRewardFunction(); //game repeats forever unless manually stopped after T times. TerminalFunction tf = new NullTermination(); //set up the initial state generator for the world, which for a bimatrix game is trivial StateGenerator sg = new NFGameState(agents.length); //create a world to synchronize the actions of agents in this domain and record results World w = new World(domain, jr, tf, sg); for(SGAgent a : agents){ w.join(a); } return w; }
/** * Initializes with a default 0.1 epsilon greedy policy/strategy * @param d the domain in which the agent will act * @param discount the discount factor * @param learningRate the learning rate * @param defaultQ the default to which all Q-values will be initialized * @param hashFactory the state hashing factory */ public SGNaiveQLAgent(SGDomain d, double discount, double learningRate, double defaultQ, HashableStateFactory hashFactory) { this.init(d); this.discount = discount; this.learningRate = new ConstantLR(learningRate); this.hashFactory = hashFactory; this.qInit = new ConstantValueFunction(defaultQ); this.qMap = new HashMap<HashableState, List<QValue>>(); stateRepresentations = new HashMap<HashableState, State>(); this.policy = new EpsilonGreedy(this, 0.1); this.storedMapAbstraction = new ShallowIdentityStateMapping(); }
@Override public OOSADomain generateDomain() { OOSADomain domain = new OOSADomain(); int [][] cmap = this.getMap(); domain.addStateClass(CLASS_AGENT, GridAgent.class).addStateClass(CLASS_LOCATION, GridLocation.class); GridWorldModel smodel = new GridWorldModel(cmap, getTransitionDynamics()); RewardFunction rf = this.rf; TerminalFunction tf = this.tf; if(rf == null){ rf = new UniformCostRF(); } if(tf == null){ tf = new NullTermination(); } FactoredModel model = new FactoredModel(smodel, rf, tf); domain.setModel(model); domain.addActionTypes( new UniversalActionType(ACTION_NORTH), new UniversalActionType(ACTION_SOUTH), new UniversalActionType(ACTION_EAST), new UniversalActionType(ACTION_WEST)); OODomain.Helper.addPfsToDomain(domain, this.generatePfs()); return domain; }