/** * Returns the R-Squared statistic, defined by the formula <pre> * R<sup>2</sup> = 1 - SSR / SSTO * </pre> * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals} * and SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares} * * <p>If there is no variance in y, i.e., SSTO = 0, NaN is returned.</p> * * @return R-square statistic * @throws NullPointerException if the sample has not been set * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular * @since 2.2 */ public double calculateRSquared() { return 1 - calculateResidualSumOfSquares() / calculateTotalSumOfSquares(); }
/** * <p>Returns the adjusted R-squared statistic, defined by the formula <pre> * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)] * </pre> * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals}, * SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares}, n is the number * of observations and p is the number of parameters estimated (including the intercept).</p> * * <p>If the regression is estimated without an intercept term, what is returned is <pre> * <code> 1 - (1 - {@link #calculateRSquared()}) * (n / (n - p)) </code> * </pre></p> * * <p>If there is no variance in y, i.e., SSTO = 0, NaN is returned.</p> * * @return adjusted R-Squared statistic * @throws NullPointerException if the sample has not been set * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular * @see #isNoIntercept() * @since 2.2 */ public double calculateAdjustedRSquared() { final double n = getX().getRowDimension(); if (isNoIntercept()) { return 1 - (1 - calculateRSquared()) * (n / (n - getX().getColumnDimension())); } else { return 1 - (calculateResidualSumOfSquares() * (n - 1)) / (calculateTotalSumOfSquares() * (n - getX().getColumnDimension())); } }
/** * Returns the R-Squared statistic, defined by the formula <pre> * R<sup>2</sup> = 1 - SSR / SSTO * </pre> * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals} * and SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares} * * <p>If there is no variance in y, i.e., SSTO = 0, NaN is returned.</p> * * @return R-square statistic * @throws NullPointerException if the sample has not been set * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular * @since 2.2 */ public double calculateRSquared() { return 1 - calculateResidualSumOfSquares() / calculateTotalSumOfSquares(); }
/** * Returns the R-Squared statistic, defined by the formula <pre> * R<sup>2</sup> = 1 - SSR / SSTO * </pre> * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals} * and SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares} * * <p>If there is no variance in y, i.e., SSTO = 0, NaN is returned.</p> * * @return R-square statistic * @throws NullPointerException if the sample has not been set * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular * @since 2.2 */ public double calculateRSquared() { return 1 - calculateResidualSumOfSquares() / calculateTotalSumOfSquares(); }
OLSMultipleLinearRegression regr = new OLSMultipleLinearRegression(); regr.newSampleData(y, x); double SSR1 = regr.calculateResidualSumOfSquares(); double df1 = y.length - (x[0].length + 1); //df = n - number of coefficients, including intercept regr.newSampleData(y, xb); double SSR2 = regr.calculateResidualSumOfSquares(); double df2 = y.length - (xb[0].length + 1); double MSE = SSR2/df2; // EDIT: You need the biggest model here! double MSEdiff = Math.abs ((SSR2 - SSR1) / (df2 - df1)); double dfdiff = Math.abs(df2 - df1); double Fval = MSEdiff / MSE; FDistribution Fdist = new FDistribution(dfdiff, df2); double pval = 1 - Fdist.cumulativeProbability(Fval);
double[] y = {-0.48812477, 0.33458213, -0.52754476, -0.79863471, -0.68544309, -0.12970239, 0.02355622, -0.31890850, 0.34725819, 0.08108851}; double[][] x = {{1,0}, {0,0}, {1,0}, {2,1}, {0,1}, {0,0}, {1,0}, {0,0}, {1,0}, {0,0}}; double[][] xb = {{1,0,0}, {0,0,0}, {1,0,0}, {2,1,2}, {0,1,0}, {0,0,0}, {1,0,0}, {0,0,0}, {1,0,0}, {0,0,0}}; OLSMultipleLinearRegression regr = new OLSMultipleLinearRegression(); int degreesOfFreedomA = y.length - (x[0].length); // no + 1 int degreesOfFreedomB = y.length - (xb[0].length); // no + 1 regr.setNoIntercept(true); regr.newSampleData(y, x); double sumOfSquaresNoInterceptA = regr.calculateResidualSumOfSquares(); regr.newSampleData(y, xb); double sumOfSquaresNoInterceptB = regr.calculateResidualSumOfSquares(); double MSE = sumOfSquaresNoInterceptB / degreesOfFreedomB; System.out.printf("RSS no intercept: %f\n",sumOfSquaresNoInterceptB); int degreesOfFreedomDifference = Math.abs(degreesOfFreedomB - degreesOfFreedomA); double MSEdiff = Math.abs((sumOfSquaresNoInterceptB - sumOfSquaresNoInterceptA) / (degreesOfFreedomDifference)); double Fval = MSEdiff / MSE; FDistribution Fdist = new FDistribution(degreesOfFreedomDifference, degreesOfFreedomB); double pval = 1 - Fdist.cumulative(Fval); System.out.printf("pval without intercept: %f",pval);
map.put("RSquared", multipleLinearRegression.calculateRSquared()); map.put("adjustedRSquared", multipleLinearRegression.calculateAdjustedRSquared()); map.put("residualSumSquares", multipleLinearRegression.calculateResidualSumOfSquares());
/** * <p>Returns the adjusted R-squared statistic, defined by the formula <pre> * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)] * </pre> * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals}, * SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares}, n is the number * of observations and p is the number of parameters estimated (including the intercept).</p> * * <p>If the regression is estimated without an intercept term, what is returned is <pre> * <code> 1 - (1 - {@link #calculateRSquared()}) * (n / (n - p)) </code> * </pre></p> * * <p>If there is no variance in y, i.e., SSTO = 0, NaN is returned.</p> * * @return adjusted R-Squared statistic * @throws NullPointerException if the sample has not been set * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular * @see #isNoIntercept() * @since 2.2 */ public double calculateAdjustedRSquared() { final double n = getX().getRowDimension(); if (isNoIntercept()) { return 1 - (1 - calculateRSquared()) * (n / (n - getX().getColumnDimension())); } else { return 1 - (calculateResidualSumOfSquares() * (n - 1)) / (calculateTotalSumOfSquares() * (n - getX().getColumnDimension())); } }
/** * <p>Returns the adjusted R-squared statistic, defined by the formula <pre> * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)] * </pre> * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals}, * SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares}, n is the number * of observations and p is the number of parameters estimated (including the intercept).</p> * * <p>If the regression is estimated without an intercept term, what is returned is <pre> * <code> 1 - (1 - {@link #calculateRSquared()}) * (n / (n - p)) </code> * </pre></p> * * <p>If there is no variance in y, i.e., SSTO = 0, NaN is returned.</p> * * @return adjusted R-Squared statistic * @throws NullPointerException if the sample has not been set * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular * @see #isNoIntercept() * @since 2.2 */ public double calculateAdjustedRSquared() { final double n = getX().getRowDimension(); if (isNoIntercept()) { return 1 - (1 - calculateRSquared()) * (n / (n - getX().getColumnDimension())); } else { return 1 - (calculateResidualSumOfSquares() * (n - 1)) / (calculateTotalSumOfSquares() * (n - getX().getColumnDimension())); } }