@Override public double value(State s) { double [] features = this.vinitFvGen.features(s); double sum = 0.; for(int i = 0; i < features.length; i++){ sum += features[i] * this.parameters[i+this.rfDim]; } return sum; }
@Override public double value(State s) { double [] features = this.fvgen.features(s); double sum = 0.; for(int i = 0; i < features.length; i++){ sum += features[i] * this.parameters[i]; } return sum; }
@Override public double reward(State s, Action a, State sprime){ double [] features; if(this.featuresAreForNextState){ features = fvGen.features(sprime); } else{ features = fvGen.features(s); } double sum = 0.; for(int i = 0; i < features.length; i++){ sum += features[i] * this.parameters[i]; } return sum; }
@Override public double reward(State s, Action a, State sprime) { double [] features; if(this.rfFeaturesAreForNextState){ features = this.rfFvGen.features(sprime); } else{ features = this.rfFvGen.features(s); } double sum = 0.; for(int i = 0; i < features.length; i++){ sum += features[i] * this.parameters[i]; } return sum; }
@Override public double evaluate(State s) { this.currentStateFeatures = this.stateFeatures.features(s); this.currentActionOffset = 0; if(this.stateWeights == null){ this.stateWeights = new double[this.currentStateFeatures.length]; for(int i = 0; i < this.stateWeights.length; i++){ this.stateWeights[i] = this.defaultWeight; } } double val = 0; for(int i = 0; i < this.currentStateFeatures.length; i++){ val += this.currentStateFeatures[i] * this.stateWeights[i]; } this.currentValue = val; this.currentGradient = null; this.lastState = s; return this.currentValue; }
@Override public double reward(State s, Action a, State sprime) { double [] sFeatures = this.fvGen.features(s); int sIndex = this.actionMap.get(a) * this.numStateFeatures; double sum = 0.; for(int i = sIndex; i < sIndex + this.numStateFeatures; i++){ sum += this.parameters[i]*sFeatures[i-sIndex]; } return sum; }
@Override public double[] features(State s, Action a) { double [] sFeatures = stateFeatures.features(s); double [] saFeatures = new double[sFeatures.length*numActions]; int offset = this.getActionOffset(a)*sFeatures.length; for(int i = 0; i < sFeatures.length; i++){ saFeatures[i+offset] = sFeatures[i]; } return saFeatures; }
@Override public double reward(State state, Action a, State sprime) { double[] featureWeightValues = newFeatureWeights.getWeights(); double sumReward = 0; double [] fv = newFeatureFunctions.features(state); for (int i = 0; i < fv.length; ++i) { sumReward += featureWeightValues[i] * fv[i]; } return sumReward; }
public FunctionGradient gradient(State s, Action a, State sp){ double [] sfeatures; if(rfFeaturesAreForNextState){ sfeatures = rfFvGen.features(sp); } else{ sfeatures = rfFvGen.features(s); } FunctionGradient gradient = new FunctionGradient.SparseGradient(sfeatures.length); for(int i = 0; i < sfeatures.length; i++){ gradient.put(i, sfeatures[i]); } return gradient; }
@Override public double evaluate(State s, Action a) { this.currentStateFeatures = this.stateFeatures.features(s); this.currentActionOffset = this.getActionOffset(a); int indOff = this.currentActionOffset*this.currentStateFeatures.length; double val = 0; for(int i = 0; i < this.currentStateFeatures.length; i++){ val += this.currentStateFeatures[i] * this.stateActionWeights[i+indOff]; } this.currentValue = val; this.currentGradient = null; this.lastState = s; return this.currentValue; }
/** * Creates a {@link java.util.List} of {@link GaussianRBF} units * for each {@link State} provided using the given {@link DenseStateFeatures}, metric, and epsilon value. * @param states the {@link State} objects around which a {@link burlap.behavior.functionapproximation.dense.rbf.functions.GaussianRBF} will be created * @param fvGen the {@link DenseStateFeatures} used to convert states to a double array usable by {@link RBF} units. * @param metric the {@link burlap.behavior.functionapproximation.dense.rbf.DistanceMetric} to use * @param epsilon the bandwidth parameter. * @return a {@link java.util.List} of {@link burlap.behavior.functionapproximation.dense.rbf.functions.GaussianRBF} units. */ public static List<RBF> generateGaussianRBFsForStates(List<State> states, DenseStateFeatures fvGen, DistanceMetric metric, double epsilon){ List<RBF> units = new ArrayList<RBF>(states.size()); for(State s : states){ units.add(new GaussianRBF(fvGen.features(s), metric, epsilon)); } return units; }
@Override public FunctionGradient gradient(State s, Action a, State sprime) { double [] features; if(featuresAreForNextState){ features = fvGen.features(sprime); } else{ features = fvGen.features(s); } FunctionGradient gradient = new FunctionGradient.SparseGradient(features.length); for(int i = 0; i < features.length; i++){ gradient.put(i, features[i]); } return gradient; }
/** * Creates a {@link java.util.List} of {@link GaussianRBF} units * for each {@link State} provided using the given {@link DenseStateFeatures}, and epsilon value * and using a default {@link EuclideanDistance} metric for all units. * @param states the {@link State} objects around which a {@link burlap.behavior.functionapproximation.dense.rbf.functions.GaussianRBF} will be created * @param fvGen the {@link DenseStateFeatures} used to convert states to a double array usable by {@link RBF} units. * @param epsilon the bandwidth parameter. * @return a {@link java.util.List} of {@link burlap.behavior.functionapproximation.dense.rbf.functions.GaussianRBF} units. */ public static List<RBF> generateGaussianRBFsForStates(List<State> states, DenseStateFeatures fvGen, double epsilon){ List<RBF> units = new ArrayList<RBF>(states.size()); for(State s : states){ units.add(new GaussianRBF(fvGen.features(s), epsilon)); } return units; }
@Override public FunctionGradient gradient(State s, Action a, State sprime) { double [] sFeatures = this.fvGen.features(s); int sIndex = this.actionMap.get(a) * this.numStateFeatures; FunctionGradient gradient = new FunctionGradient.SparseGradient(sFeatures.length); int soff = this.numStateFeatures*this.numActions; for(int i = 0; i < sFeatures.length; i++){ int f = i + soff; gradient.put(f, sFeatures[i]); } return gradient; }
@Override public List<StateFeature> features(State s) { double [] input = this.featureVectorGenerator.features(s); List<StateFeature> features = new ArrayList<StateFeature>(); for(int i = 0; i < this.tilings.size(); i++){ Tiling tiling = this.tilings.get(i); Map<Tiling.FVTile, Integer> tileFeatureMap = this.stateFeatures.get(i); Tiling.FVTile tile = tiling.getFVTile(input); int f = this.getOrGenerateFeature(tileFeatureMap, tile); StateFeature sf = new StateFeature(f, 1.); features.add(sf); } return features; }
@Override public double [] features(State s) { double [] input = this.inputFeatures.features(s); if(this.coefficientVectors == null){ this.numStateVariables = input.length; if(this.maxNonZeroCoefficients == -1){ this.maxNonZeroCoefficients = this.numStateVariables; } this.generateCoefficientVectors(); } List<StateFeature> res = new ArrayList<StateFeature>(this.coefficientVectors.size()); double [] features = new double[this.coefficientVectors.size()]; for(int i = 0; i < this.coefficientVectors.size(); i++){ double value = this.basisValue(input, i); features[i] = value; } return features; }
@Override public FunctionGradient valueGradient(State s) { double [] vFeatures = this.vinitFvGen.features(s); FunctionGradient gradient = new FunctionGradient.SparseGradient(vFeatures.length); for(int i = 0; i < vFeatures.length; i++){ gradient.put(i+this.rfDim, vFeatures[i]); } return gradient; }
@Override public FunctionGradient valueGradient(State s) { double [] fvec = this.fvgen.features(s); FunctionGradient gradient = new FunctionGradient.SparseGradient(); for(int i = 0; i < fvec.length; i++){ gradient.put(i, fvec[i]); } return gradient; }
/** * Takes a OO-MDP state and converts it into an RLGlue observation * @param s the OO-MDP state * @return an RLGlue Observation */ protected Observation convertIntoObservation(State s){ Observation o = new Observation(0, this.valueRanges.length); double [] flatRep = this.stateFlattener.features(s); for(int i = 0; i < flatRep.length; i++){ o.setDouble(i, flatRep[i]); } return o; }
@Override public FunctionGradient gradient(State s) { double [] features; if(this.lastState == s){ if(this.currentGradient != null){ return this.currentGradient; } features = this.currentStateFeatures; } else{ features = this.stateFeatures.features(s); } FunctionGradient gradient = new FunctionGradient.SparseGradient(features.length); for(int i = 0; i < features.length; i++){ gradient.put(i, features[i]); } this.currentGradient = gradient; this.currentStateFeatures = features; this.lastState = s; return gradient; }