@Override public double evaluate(State s, Action a) { this.currentFeatures = this.features.features(s, a); if(this.stateActionWeights == null){ this.stateActionWeights = new double[this.currentFeatures.length]; for(int i = 0; i < this.stateActionWeights.length; i++){ this.stateActionWeights[i] = defaultWeight; } } double val = 0; for(int i = 0; i < this.currentFeatures.length; i++){ val += this.currentFeatures[i] * this.stateActionWeights[i]; } this.currentValue = val; this.currentGradient = null; this.lastState = s; return this.currentValue; }
@Override public FunctionGradient gradient(State s, Action a) { double [] features; if(this.lastState == s){ if(this.currentGradient != null){ return this.currentGradient; } features = this.currentFeatures; } else{ features = this.features.features(s, a); } FunctionGradient gradient = new FunctionGradient.SparseGradient(features.length); for(int i = 0; i < features.length; i++){ gradient.put(i, features[i]); } this.currentGradient = gradient; this.currentFeatures = features; this.lastState = s; return gradient; }