@Before public void setup() { this.gw = new GridWorldDomain(11,11); gw.setMapToFourRooms(); gw.setProbSucceedTransitionDynamics(1.0); this.domain = gw.generateDomain(); //generate the grid world domain } public State generateState() {
public static void main(String[] args) { GridWorldDomain gwd = new GridWorldDomain(11, 11); gwd.setMapToFourRooms(); gwd.setProbSucceedTransitionDynamics(0.8); gwd.setTf(new GridWorldTerminalFunction(10, 10)); SADomain domain = gwd.generateDomain(); //get initial state with agent in 0,0 State s = new GridWorldState(new GridAgent(0, 0)); //create environment SimulatedEnvironment env = new SimulatedEnvironment(domain, s); //create Q-learning QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(), new ConstantValueFunction(), 0.1, 0.1); //run Q-learning and store results in a list List<Episode> episodes = new ArrayList<Episode>(1000); for(int i = 0; i < 1000; i++){ episodes.add(agent.runLearningEpisode(env)); env.resetEnvironment(); } Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap()); new EpisodeSequenceVisualizer(v, domain, episodes); }
public static void main(String [] args){ GridWorldDomain gwd = new GridWorldDomain(11, 11); gwd.setTf(new GridWorldTerminalFunction(10, 10)); gwd.setMapToFourRooms(); //only go in intended directon 80% of the time gwd.setProbSucceedTransitionDynamics(0.8); SADomain domain = gwd.generateDomain(); //get initial state with agent in 0,0 State s = new GridWorldState(new GridAgent(0, 0)); //setup vi with 0.99 discount factor, a value //function initialization that initializes all states to value 0, and which will //run for 30 iterations over the state space VITutorial vi = new VITutorial(domain, 0.99, new SimpleHashableStateFactory(), new ConstantValueFunction(0.0), 30); //run planning from our initial state Policy p = vi.planFromState(s); //evaluate the policy with one roll out visualize the trajectory Episode ea = PolicyUtils.rollout(p, s, domain.getModel()); Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap()); new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea)); }
public static void main(String[] args) { GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world gw.setMapToFourRooms(); //four rooms layout gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate SADomain domain = gw.generateDomain(); //generate the grid world domain //setup initial state State s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0")); //create visualizer and explorer Visualizer v = GridWorldVisualizer.getVisualizer(gw.getMap()); VisualExplorer exp = new VisualExplorer(domain, v, s); //set control keys to use w-s-a-d exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, ""); exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, ""); exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, ""); exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, ""); exp.initGUI(); }
gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate