public void testSimpleHashFactoryLargeState(HashableStateFactory factory, int width, int numRandomStates, boolean moveLocObjects) { GridWorldDomain gw = new GridWorldDomain(width, width); SADomain domain = (SADomain)gw.generateDomain(); State startState = this.generateLargeGW(domain, width); Set<HashableState> hashedStates = this.generateRandomStates(domain, startState, factory, width, numRandomStates, moveLocObjects); Set<Integer> hashes = new HashSet<Integer>(); for (HashableState hs : hashedStates) { hashes.add(hs.hashCode()); } System.out.println("Hashed states: " + hashedStates.size() + ", hashes: " + hashes.size()); }
public static void main(String[] args) { GridWorldDomain gwd = new GridWorldDomain(11, 11); SADomain domain = gwd.generateDomain(); State s = new GridWorldState(new GridAgent(1, 3)); Policy p = new RandomPolicy(domain); Episode ea = PolicyUtils.rollout(p, s, domain.getModel(), 30); String yamlOut = ea.serialize(); System.out.println(yamlOut); System.out.println("\n\n"); Episode read = Episode.parseEpisode(yamlOut); System.out.println(read.actionString()); System.out.println(read.state(0).toString()); System.out.println(read.actionSequence.size()); System.out.println(read.stateSequence.size()); }
@Before public void setup() { this.gw = new GridWorldDomain(11,11); gw.setMapToFourRooms(); gw.setProbSucceedTransitionDynamics(1.0); this.domain = gw.generateDomain(); //generate the grid world domain } public State generateState() {
public IRLExample(){ this.gwd = new GridWorldDomain(5 ,5); this.gwd.setNumberOfLocationTypes(5); gwd.makeEmptyMap(); this.domain = gwd.generateDomain(); State bs = this.basicState(); this.sg = new LeftSideGen(5, bs); this.v = GridWorldVisualizer.getVisualizer(this.gwd.getMap()); }
public static void testOptions(){ GridWorldDomain gwd = new GridWorldDomain(11, 11); gwd.setMapToFourRooms(); SADomain domain = gwd.generateDomain(); Option swToNorth = createRoomOption("swToNorth", domain, 1, 5, 0, 0, 4, 4); Option swToEast = createRoomOption("swToEast", domain, 5, 1, 0, 0, 4, 4); Option seToWest = createRoomOption("seToWest", domain, 5, 1, 6, 0, 10, 3); Option seToNorth = createRoomOption("seToNorth", domain, 8, 4, 6, 0, 10, 3); Option neToSouth = createRoomOption("neToSouth", domain, 8, 4, 6, 5, 10, 10); Option neToWest = createRoomOption("neToWest", domain, 5, 8, 6, 5, 10, 10); Option nwToEast = createRoomOption("nwToEast", domain, 5, 8, 0, 6, 4, 10); Option nwToSouth = createRoomOption("nwToSouth", domain, 1, 5, 0, 6, 4, 10); List<Episode> episodes = new ArrayList<Episode>(); episodes.add(optionExecuteResult(domain, swToNorth, new GridWorldState(0, 0))); episodes.add(optionExecuteResult(domain, swToEast, new GridWorldState(0, 0))); episodes.add(optionExecuteResult(domain, seToWest, new GridWorldState(10, 0))); episodes.add(optionExecuteResult(domain, seToNorth, new GridWorldState(10, 0))); episodes.add(optionExecuteResult(domain, neToSouth, new GridWorldState(10, 10))); episodes.add(optionExecuteResult(domain, neToWest, new GridWorldState(10, 10))); episodes.add(optionExecuteResult(domain, nwToEast, new GridWorldState(0, 10))); episodes.add(optionExecuteResult(domain, nwToSouth, new GridWorldState(0, 10))); Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap()); EpisodeSequenceVisualizer evis = new EpisodeSequenceVisualizer(v, domain, episodes); }
@Before public void setup() { this.gw = new GridWorldDomain(11, 11); this.gw.setMapToFourRooms(); this.gw.setRf(new UniformCostRF()); TerminalFunction tf = new SinglePFTF(PropositionalFunction.findPF(gw.generatePfs(), PF_AT_LOCATION)); this.gw.setTf(tf); this.domain = this.gw.generateDomain(); this.goalCondition = new TFGoalCondition(tf); this.hashingFactory = new SimpleHashableStateFactory(); }
public static void main(String[] args) { GridWorldDomain gwd = new GridWorldDomain(11, 11); gwd.setMapToFourRooms(); gwd.setProbSucceedTransitionDynamics(0.8); gwd.setTf(new GridWorldTerminalFunction(10, 10)); SADomain domain = gwd.generateDomain(); //get initial state with agent in 0,0 State s = new GridWorldState(new GridAgent(0, 0)); //create environment SimulatedEnvironment env = new SimulatedEnvironment(domain, s); //create Q-learning QLTutorial agent = new QLTutorial(domain, 0.99, new SimpleHashableStateFactory(), new ConstantValueFunction(), 0.1, 0.1); //run Q-learning and store results in a list List<Episode> episodes = new ArrayList<Episode>(1000); for(int i = 0; i < 1000; i++){ episodes.add(agent.runLearningEpisode(env)); env.resetEnvironment(); } Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap()); new EpisodeSequenceVisualizer(v, domain, episodes); }
public BasicBehavior(){ gwdg = new GridWorldDomain(11, 11); gwdg.setMapToFourRooms(); tf = new GridWorldTerminalFunction(10, 10); gwdg.setTf(tf); goalCondition = new TFGoalCondition(tf); domain = gwdg.generateDomain(); initialState = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0")); hashingFactory = new SimpleHashableStateFactory(); env = new SimulatedEnvironment(domain, initialState); // VisualActionObserver observer = new VisualActionObserver(domain, GridWorldVisualizer.getVisualizer(gwdg.getMap())); // observer.initGUI(); // env.addObservers(observer); }
SADomain d = gwdg.generateDomain();
public static void main(String [] args){ GridWorldDomain gwd = new GridWorldDomain(11, 11); gwd.setTf(new GridWorldTerminalFunction(10, 10)); gwd.setMapToFourRooms(); //only go in intended directon 80% of the time gwd.setProbSucceedTransitionDynamics(0.8); SADomain domain = gwd.generateDomain(); //get initial state with agent in 0,0 State s = new GridWorldState(new GridAgent(0, 0)); //setup vi with 0.99 discount factor, a value //function initialization that initializes all states to value 0, and which will //run for 30 iterations over the state space VITutorial vi = new VITutorial(domain, 0.99, new SimpleHashableStateFactory(), new ConstantValueFunction(0.0), 30); //run planning from our initial state Policy p = vi.planFromState(s); //evaluate the policy with one roll out visualize the trajectory Episode ea = PolicyUtils.rollout(p, s, domain.getModel()); Visualizer v = GridWorldVisualizer.getVisualizer(gwd.getMap()); new EpisodeSequenceVisualizer(v, domain, Arrays.asList(ea)); }
public static void main(String[] args) { GridWorldDomain gw = new GridWorldDomain(11,11); //11x11 grid world gw.setMapToFourRooms(); //four rooms layout gw.setProbSucceedTransitionDynamics(0.8); //stochastic transitions with 0.8 success rate SADomain domain = gw.generateDomain(); //generate the grid world domain //setup initial state State s = new GridWorldState(new GridAgent(0, 0), new GridLocation(10, 10, "loc0")); //create visualizer and explorer Visualizer v = GridWorldVisualizer.getVisualizer(gw.getMap()); VisualExplorer exp = new VisualExplorer(domain, v, s); //set control keys to use w-s-a-d exp.addKeyAction("w", GridWorldDomain.ACTION_NORTH, ""); exp.addKeyAction("s", GridWorldDomain.ACTION_SOUTH, ""); exp.addKeyAction("a", GridWorldDomain.ACTION_WEST, ""); exp.addKeyAction("d", GridWorldDomain.ACTION_EAST, ""); exp.initGUI(); }
final OOSADomain domain = gw.generateDomain(); //generate the grid world domain
gwd.setTf(tf); final SADomain domain = gwd.generateDomain(); State s = new GridWorldState(0, 0);