@Override public double evaluate(State s, Action a) { this.currentStateFeatures = this.stateFeatures.features(s); this.currentActionOffset = this.getActionOffset(a); int indOff = this.currentActionOffset*this.currentStateFeatures.length; double val = 0; for(int i = 0; i < this.currentStateFeatures.length; i++){ val += this.currentStateFeatures[i] * this.stateActionWeights[i+indOff]; } this.currentValue = val; this.currentGradient = null; this.lastState = s; return this.currentValue; }
@Override public FunctionGradient gradient(State s, Action a){ double [] features; if(this.lastState == s){ if(this.currentGradient != null){ return this.currentGradient; } features = this.currentStateFeatures; } else{ features = this.stateFeatures.features(s); } FunctionGradient gradient = new FunctionGradient.SparseGradient(features.length); int actionOffset = this.getActionOffset(a); int sIndOffset = actionOffset*features.length; for(int i = 0; i < features.length; i++){ gradient.put(i+sIndOffset, features[i]); } this.currentGradient = gradient; this.currentStateFeatures = features; this.lastState = s; return gradient; }