public MCPhysicsParams copy(){ MCPhysicsParams c = new MCPhysicsParams(); c.xmin = this.xmin; c.xmax = this.xmax; c.cosScale = this.cosScale; c.vmin = this.vmin; c.vmax = this.vmax; c.acceleration = this.acceleration; c.gravity = this.gravity; c.timeDelta = this.timeDelta; return c; }
@Override public State copy() { return new MCState(x, v); }
public static void MCLSPIFB(){ MountainCar mcGen = new MountainCar(); SADomain domain = mcGen.generateDomain(); StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams); SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain); SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null); NormalizedVariableFeatures inputFeatures = new NormalizedVariableFeatures() .variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax)) .variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax)); FourierBasis fb = new FourierBasis(inputFeatures, 4); LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset); Policy p = lspi.runPolicyIteration(30, 1e-6); Visualizer v = MountainCarVisualizer.getVisualizer(mcGen); VisualActionObserver vob = new VisualActionObserver(v); vob.initGUI(); SimulatedEnvironment env = new SimulatedEnvironment(domain, new MCState(mcGen.physParams.valleyPos(), 0.)); env.addObservers(vob); for(int i = 0; i < 5; i++){ PolicyUtils.rollout(p, env); env.resetEnvironment(); } System.out.println("Finished"); }
/** * Will launch a visual explorer for the mountain car domain that is controlled with the a-s-d keys. * @param args empty arguments. */ public static void main(String [] args){ MountainCar mcGen = new MountainCar(); SADomain domain = mcGen.generateDomain(); State s = mcGen.valleyState(); Visualizer vis = MountainCarVisualizer.getVisualizer(mcGen); VisualExplorer exp = new VisualExplorer(domain, vis, s); exp.addKeyAction("d", ACTION_FORWARD, ""); exp.addKeyAction("s", ACTION_COAST, ""); exp.addKeyAction("a", ACTION_BACKWARDS, ""); exp.initGUI(); }
@Override public SADomain generateDomain() { SADomain domain = new SADomain(); MCModel smodel = new MCModel(this.physParams.copy()); if(tf == null){ tf = new ClassicMCTF(physParams.xmax); } if(rf == null){ rf = new GoalBasedRF(tf, 100, 0); } FactoredModel model = new FactoredModel(smodel, rf, tf); domain.setModel(model); domain.addActionType(new UniversalActionType(ACTION_FORWARD)) .addActionType(new UniversalActionType(ACTION_BACKWARDS)) .addActionType(new UniversalActionType(ACTION_COAST)); return domain; }
/** * Returns a {@link burlap.visualizer.StateRenderLayer} for a {@link burlap.domain.singleagent.mountaincar.MountainCar} {@link burlap.mdp.core.Domain} * using the hill design/physics defined in the {@link burlap.domain.singleagent.mountaincar.MountainCar.MCPhysicsParams} for visualization * @param physParams the physics/hill design to be visualized * @return a {@link burlap.visualizer.StateRenderLayer} for a {@link burlap.domain.singleagent.mountaincar.MountainCar} {@link burlap.mdp.core.Domain} */ public static StateRenderLayer getStateRenderLayer(MountainCar.MCPhysicsParams physParams){ StateRenderLayer slr = new StateRenderLayer(); slr.addStatePainter(new HillPainter(physParams)); slr.addStatePainter(new AgentPainter(physParams)); return slr; }
/** * Returns a {@link burlap.visualizer.Visualizer} for a {@link burlap.domain.singleagent.mountaincar.MountainCar} {@link burlap.mdp.core.Domain} * using the hill design/physics defined in the {@link burlap.mdp.auxiliary.DomainGenerator} for visualization * @param mcGen the generator for a given mountain car domain that is to be visualized. * @return a {@link burlap.visualizer.Visualizer} for the mountain car domain. */ public static Visualizer getVisualizer(MountainCar mcGen){ Visualizer v = new Visualizer(getStateRenderLayer(mcGen.physParams)); return v; }
public MCState valleyState(){ return this.physParams.valleyState(); }
double x = this.physParams.xmin + (i * inc); double y = Math.sin(this.physParams.cosScale*x); worldPoints.add(new MyPoint(x, y));
public static void main(String[] args) { MountainCar mcGen = new MountainCar(); SADomain domain = mcGen.generateDomain(); StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams); SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain); SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null); NormalizedVariableFeatures features = new NormalizedVariableFeatures() .variableDomain("x", new VariableDomain(mcGen.physParams.xmin, mcGen.physParams.xmax)) .variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax)); FourierBasis fb = new FourierBasis(features, 4); LSPI lspi = new LSPI(domain, 0.99, new DenseCrossProductFeatures(fb, 3), dataset); Policy p = lspi.runPolicyIteration(30, 1e-6); Visualizer v = MountainCarVisualizer.getVisualizer(mcGen); VisualActionObserver vob = new VisualActionObserver(v); vob.initGUI(); SimulatedEnvironment env = new SimulatedEnvironment(domain, new MCState(mcGen.physParams.valleyPos(), 0)); EnvironmentServer envServ = new EnvironmentServer(env, vob); for(int i = 0; i < 100; i++){ PolicyUtils.rollout(p, envServ); envServ.resetEnvironment(); } System.out.println("Finished"); }
@Override public State generateState() { double rx = this.rand.nextDouble() * (this.xmax - this.xmin) + this.xmin; double rv = this.rand.nextDouble() * (this.vmax - this.vmin) + this.vmin; MCState s = new MCState(rx, rv); return s; }
/** * Returns a {@link burlap.visualizer.Visualizer} for a {@link burlap.domain.singleagent.mountaincar.MountainCar} {@link burlap.mdp.core.Domain} * using the hill design/physics defined in the {@link burlap.domain.singleagent.mountaincar.MountainCar.MCPhysicsParams} for visualization * @param physParams the physics/hill design to be visualized * @return a {@link burlap.visualizer.Visualizer} for a {@link burlap.domain.singleagent.mountaincar.MountainCar} {@link burlap.mdp.core.Domain} */ public static Visualizer getVisualizer(MountainCar.MCPhysicsParams physParams){ Visualizer v = new Visualizer(getStateRenderLayer(physParams)); return v; }
public static void MCLSPIRBF(){ MountainCar mcGen = new MountainCar(); SADomain domain = mcGen.generateDomain(); MCState s = new MCState(mcGen.physParams.valleyPos(), 0.); .variableDomain("v", new VariableDomain(mcGen.physParams.vmin, mcGen.physParams.vmax)); StateGenerator rStateGen = new MCRandomStateGenerator(mcGen.physParams); SARSCollector collector = new SARSCollector.UniformRandomSARSCollector(domain); SARSData dataset = collector.collectNInstances(rStateGen, domain.getModel(), 5000, 20, null); Policy p = lspi.runPolicyIteration(30, 1e-6); Visualizer v = MountainCarVisualizer.getVisualizer(mcGen); VisualActionObserver vob = new VisualActionObserver(v); vob.initGUI();