@Override public double[] features(State s, Action a) { double [] sFeatures = stateFeatures.features(s); double [] saFeatures = new double[sFeatures.length*numActions]; int offset = this.getActionOffset(a)*sFeatures.length; for(int i = 0; i < sFeatures.length; i++){ saFeatures[i+offset] = sFeatures[i]; } return saFeatures; }
@Override public double evaluate(State s, Action a) { this.currentStateFeatures = this.stateFeatures.features(s); this.currentActionOffset = this.getActionOffset(a); int indOff = this.currentActionOffset*this.currentStateFeatures.length; double val = 0; for(int i = 0; i < this.currentStateFeatures.length; i++){ val += this.currentStateFeatures[i] * this.stateActionWeights[i+indOff]; } this.currentValue = val; this.currentGradient = null; this.lastState = s; return this.currentValue; }
/** * Creates and returns a linear VFA object over this RBF feature database. * @param defaultWeightValue the default feature weight value to use for all features * @return a linear VFA object over this RBF feature database. */ public DenseLinearVFA generateVFA(double defaultWeightValue) { return new DenseLinearVFA(this, defaultWeightValue); }
@Override public DenseStateActionFeatures copy() { return new DenseCrossProductFeatures(stateFeatures, numActions, actionOffset); }
@Override public DenseStateFeatures copy() { return new NormalizedVariableFeatures(new HashMap<Object, VariableDomain>(domains)); } }
@Override public double value(State s) { double [] features = this.vinitFvGen.features(s); double sum = 0.; for(int i = 0; i < features.length; i++){ sum += features[i] * this.parameters[i+this.rfDim]; } return sum; }
@Override public NumericVariableFeatures copy() { return new NumericVariableFeatures(new ArrayList<Object>(this.whiteList)); } }
@Override public SparseToDenseFeatures copy() { return new SparseToDenseFeatures(sparseStateFeatures); } }
@Override public double qValue(State s, Action a) { return this.vfa.evaluate(s, a); }
@Override public double evaluate(State s, Action a) { this.currentFeatures = this.features.features(s, a); if(this.stateActionWeights == null){ this.stateActionWeights = new double[this.currentFeatures.length]; for(int i = 0; i < this.stateActionWeights.length; i++){ this.stateActionWeights[i] = defaultWeight; } } double val = 0; for(int i = 0; i < this.currentFeatures.length; i++){ val += this.currentFeatures[i] * this.stateActionWeights[i]; } this.currentValue = val; this.currentGradient = null; this.lastState = s; return this.currentValue; }
public int getActionOffset(Action a){ Integer offset = this.actionOffset.get(a); if(offset == null){ offset = this.actionOffset.size(); this.actionOffset.put(a, offset); this.expandStateActionWeights(this.currentStateFeatures.length); } return offset; }
@Override public DenseStateFeatures copy() { return new PFFeatures(this.pfsToUse.clone()); } }
@Override public void resetSolver() { this.dataset.clear(); this.vfa.resetParameters(); }
@Override public DenseStateActionLinearVFA copy() { return new DenseStateActionLinearVFA(features, this.stateActionWeights.clone(), this.defaultWeight); } }
@Override public double value(State s) { double [] features = this.fvgen.features(s); double sum = 0.; for(int i = 0; i < features.length; i++){ sum += features[i] * this.parameters[i]; } return sum; }
/** * Creates and returns a linear VFA object over this Fourier basis feature database. * @param defaultWeightValue the default feature weight value to use for all features * @return a linear VFA object over this Fourier basis feature database. */ public DenseLinearVFA generateVFA(double defaultWeightValue) { return new DenseLinearVFA(this, defaultWeightValue); }
@Override public double reward(State s, Action a, State sprime){ double [] features; if(this.featuresAreForNextState){ features = fvGen.features(sprime); } else{ features = fvGen.features(s); } double sum = 0.; for(int i = 0; i < features.length; i++){ sum += features[i] * this.parameters[i]; } return sum; }
@Override public double reward(State s, Action a, State sprime) { double [] features; if(this.rfFeaturesAreForNextState){ features = this.rfFvGen.features(sprime); } else{ features = this.rfFvGen.features(s); } double sum = 0.; for(int i = 0; i < features.length; i++){ sum += features[i] * this.parameters[i]; } return sum; }
@Override public double reward(State s, Action a, State sprime) { double [] sFeatures = this.fvGen.features(s); int sIndex = this.actionMap.get(a) * this.numStateFeatures; double sum = 0.; for(int i = sIndex; i < sIndex + this.numStateFeatures; i++){ sum += this.parameters[i]*sFeatures[i-sIndex]; } return sum; }
@Override public double reward(State state, Action a, State sprime) { double[] featureWeightValues = newFeatureWeights.getWeights(); double sumReward = 0; double [] fv = newFeatureFunctions.features(state); for (int i = 0; i < fv.length; ++i) { sumReward += featureWeightValues[i] * fv[i]; } return sumReward; }