/** * Returns a {@link burlap.visualizer.Visualizer} for a {@link burlap.domain.singleagent.mountaincar.MountainCar} {@link burlap.mdp.core.Domain} * using the hill design/physics defined in the {@link burlap.mdp.auxiliary.DomainGenerator} for visualization * @param mcGen the generator for a given mountain car domain that is to be visualized. * @return a {@link burlap.visualizer.Visualizer} for the mountain car domain. */ public static Visualizer getVisualizer(MountainCar mcGen){ Visualizer v = new Visualizer(getStateRenderLayer(mcGen.physParams)); return v; }
/** * Will launch a visual explorer for the mountain car domain that is controlled with the a-s-d keys. * @param args empty arguments. */ public static void main(String [] args){ MountainCar mcGen = new MountainCar(); SADomain domain = mcGen.generateDomain(); State s = mcGen.valleyState(); Visualizer vis = MountainCarVisualizer.getVisualizer(mcGen); VisualExplorer exp = new VisualExplorer(domain, vis, s); exp.addKeyAction("d", ACTION_FORWARD, ""); exp.addKeyAction("s", ACTION_COAST, ""); exp.addKeyAction("a", ACTION_BACKWARDS, ""); exp.initGUI(); }
public static void MCLSPIFB(){ MountainCar mcGen = new MountainCar(); SADomain domain = mcGen.generateDomain(); StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams); SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain); SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null); NormalizedVariableFeatures inputFeatures = new NormalizedVariableFeatures() .variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax)) .variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax)); FourierBasis fb = new FourierBasis(inputFeatures, 4); LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset); Policy p = lspi.runPolicyIteration(30, 1e-6); Visualizer v = MountainCarVisualizer.getVisualizer(mcGen); VisualActionObserver vob = new VisualActionObserver(v); vob.initGUI(); SimulatedEnvironment env = new SimulatedEnvironment(domain, new MCState(mcGen.physParams.valleyPos(), 0.)); env.addObservers(vob); for(int i = 0; i < 5; i++){ PolicyUtils.rollout(p, env); env.resetEnvironment(); } System.out.println("Finished"); }
public static void main(String[] args) { MountainCar mcGen = new MountainCar(); SADomain domain = mcGen.generateDomain(); StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams); SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain); SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null); NormalizedVariableFeatures features = new NormalizedVariableFeatures() .variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax)) .variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax)); FourierBasis fb = new FourierBasis(features, 4); LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset); Policy p = lspi.runPolicyIteration(30, 1e-6); Visualizer v = MountainCarVisualizer.getVisualizer(mcGen); VisualActionObserver vob = new VisualActionObserver(v); vob.initGUI(); SimulatedEnvironment env = new SimulatedEnvironment(domain, new MCState(mcGen.physParams.valleyPos(), 0)); EnvironmentServer envServ = new EnvironmentServer(env, vob); for(int i = 0; i < 100; i++){ PolicyUtils.rollout(p, envServ); envServ.resetEnvironment(); } System.out.println("Finished"); }
/** * Returns a {@link burlap.visualizer.Visualizer} for a {@link burlap.domain.singleagent.mountaincar.MountainCar} {@link burlap.mdp.core.Domain} * using the hill design/physics defined in the {@link burlap.domain.singleagent.mountaincar.MountainCar.MCPhysicsParams} for visualization * @param physParams the physics/hill design to be visualized * @return a {@link burlap.visualizer.Visualizer} for a {@link burlap.domain.singleagent.mountaincar.MountainCar} {@link burlap.mdp.core.Domain} */ public static Visualizer getVisualizer(MountainCar.MCPhysicsParams physParams){ Visualizer v = new Visualizer(getStateRenderLayer(physParams)); return v; }
Policy p = lspi.runPolicyIteration(30, 1e-6); Visualizer v = MountainCarVisualizer.getVisualizer(mcGen); VisualActionObserver vob = new VisualActionObserver(v); vob.initGUI();