/** * <p>Returns the adjusted R-squared statistic, defined by the formula <pre> * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)] * </pre> * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals}, * SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares}, n is the number * of observations and p is the number of parameters estimated (including the intercept).</p> * * <p>If the regression is estimated without an intercept term, what is returned is <pre> * <code> 1 - (1 - {@link #calculateRSquared()}) * (n / (n - p)) </code> * </pre></p> * * <p>If there is no variance in y, i.e., SSTO = 0, NaN is returned.</p> * * @return adjusted R-Squared statistic * @throws NullPointerException if the sample has not been set * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular * @see #isNoIntercept() * @since 2.2 */ public double calculateAdjustedRSquared() { final double n = getX().getRowDimension(); if (isNoIntercept()) { return 1 - (1 - calculateRSquared()) * (n / (n - getX().getColumnDimension())); } else { return 1 - (calculateResidualSumOfSquares() * (n - 1)) / (calculateTotalSumOfSquares() * (n - getX().getColumnDimension())); } }
@Override public void trainSimilarity(List<EnsembleSim> simList) { if (simList.isEmpty()) { throw new IllegalArgumentException("no examples to train on!"); } similarityInterpolator.trainSimilarity(simList); double[][] X = new double[simList.size()][numMetrics]; double[] Y = new double[simList.size()]; for (int i = 0; i<simList.size(); i++){ Y[i]=simList.get(i).knownSim.similarity; EnsembleSim es = similarityInterpolator.interpolate(simList.get(i)); for (int j=0; j<numMetrics; j++){ X[i][j]=es.getScores().get(j); } } OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(Y, X); simlarityCoefficients = new TDoubleArrayList(regression.estimateRegressionParameters()); double pearson = Math.sqrt(regression.calculateRSquared()); LOG.info("coefficients are " + simlarityCoefficients.toString()); LOG.info("pearson for multiple regression is " + pearson); }
@Override public void trainSimilarity(List<EnsembleSim> simList) { if (simList.isEmpty()) { throw new IllegalArgumentException("no examples to train on!"); } similarityInterpolator.trainSimilarity(simList); double[][] X = new double[simList.size()][numMetrics]; double[] Y = new double[simList.size()]; for (int i = 0; i<simList.size(); i++){ Y[i]=simList.get(i).knownSim.similarity; EnsembleSim es = similarityInterpolator.interpolate(simList.get(i)); for (int j=0; j<numMetrics; j++){ X[i][j]=es.getScores().get(j); } } OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(Y, X); simlarityCoefficients = new TDoubleArrayList(regression.estimateRegressionParameters()); double pearson = Math.sqrt(regression.calculateRSquared()); LOG.info("coefficients are " + simlarityCoefficients.toString()); LOG.info("pearson for multiple regression is " + pearson); }
@Override public void trainMostSimilar(List<EnsembleSim> simList) { if (simList.isEmpty()){ throw new IllegalStateException("no examples to train on!"); } mostSimilarInterpolator.trainMostSimilar(simList); // Remove things that have no observed metrics List<EnsembleSim> pruned = new ArrayList<EnsembleSim>(); for (EnsembleSim es : simList) { if (es != null && es.getNumMetricsWithScore() > 0) { pruned.add(es); } } double[][] X = new double[pruned.size()][numMetrics*2]; double[] Y = new double[pruned.size()]; for (int i=0; i<pruned.size(); i++){ Y[i]=pruned.get(i).knownSim.similarity; EnsembleSim es = mostSimilarInterpolator.interpolate(pruned.get(i)); for (int j=0; j<numMetrics; j++){ X[i][2*j]= es.getScores().get(j); X[i][2*j+1]= Math.log(es.getRanks().get(j)+1); } } OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(Y,X); mostSimilarCoefficients = new TDoubleArrayList(regression.estimateRegressionParameters()); double pearson = Math.sqrt(regression.calculateRSquared()); LOG.info("coefficients are "+mostSimilarCoefficients.toString()); LOG.info("pearson for multiple regression is "+pearson); }
/** * Gets the correlation coefficients. * * @param data the data * @return the correlation coefficients */ protected List<Double> getCorrelationCoefficients(final double[][] data) { int n = data.length; int m = data[0].length; List<Double> correlationCoefficients = new LinkedList<Double>(); for (int i = 0; i < n; i++) { double[][] x = new double[n - 1][m]; int k = 0; for (int j = 0; j < n; j++) { if (j != i) { x[k++] = data[j]; } } // Transpose the matrix so that it fits the linear model double[][] xT = new Array2DRowRealMatrix(x).transpose().getData(); // RSquare is the "coefficient of determination" correlationCoefficients.add(MathUtil.createLinearRegression(xT, data[i]).calculateRSquared()); } return correlationCoefficients; }
/** * Gets the correlation coefficients. * * @param data the data * @return the correlation coefficients */ protected List<Double> getCorrelationCoefficients(final double[][] data) { int n = data.length; int m = data[0].length; List<Double> correlationCoefficients = new LinkedList<Double>(); for (int i = 0; i < n; i++) { double[][] x = new double[n - 1][m]; int k = 0; for (int j = 0; j < n; j++) { if (j != i) { x[k++] = data[j]; } } // Transpose the matrix so that it fits the linear model double[][] xT = new Array2DRowRealMatrix(x).transpose().getData(); // RSquare is the "coefficient of determination" correlationCoefficients.add(MathUtil.createLinearRegression(xT, data[i]).calculateRSquared()); } return correlationCoefficients; }
/** * Gets the correlation coefficients. * * @param data the data * @return the correlation coefficients */ protected List<Double> getCorrelationCoefficients(final double[][] data) { int n = data.length; int m = data[0].length; List<Double> correlationCoefficients = new LinkedList<Double>(); for (int i = 0; i < n; i++) { double[][] x = new double[n - 1][m]; int k = 0; for (int j = 0; j < n; j++) { if (j != i) { x[k++] = data[j]; } } // Transpose the matrix so that it fits the linear model double[][] xT = new Array2DRowRealMatrix(x).transpose().getData(); // RSquare is the "coefficient of determination" correlationCoefficients.add(MathUtil.createLinearRegression(xT, data[i]).calculateRSquared()); } return correlationCoefficients; }
/** * Gets the correlation coefficients. * * @param data the data * @return the correlation coefficients */ protected List<Double> getCorrelationCoefficients(final double[][] data) { final int rows = data.length; final int cols = data[0].length; final List<Double> correlationCoefficients = new LinkedList<>(); for (int i = 0; i < rows; i++) { final double[][] x = new double[rows - 1][cols]; int k = 0; for (int j = 0; j < rows; j++) { if (j != i) { x[k++] = data[j]; } } // Transpose the matrix so that it fits the linear model final double[][] xT = new Array2DRowRealMatrix(x).transpose().getData(); // RSquare is the "coefficient of determination" correlationCoefficients.add( MathUtil.createLinearRegression(xT, data[i]).calculateRSquared()); } return correlationCoefficients; }
map.put("RSquared", multipleLinearRegression.calculateRSquared()); map.put("adjustedRSquared", multipleLinearRegression.calculateAdjustedRSquared()); map.put("residualSumSquares", multipleLinearRegression.calculateResidualSumOfSquares());
@Override LR.ModelResult asResult() { LR.ModelResult r = new LR.ModelResult(name, framework, hasConstant(), numVars, state, getNTrain(), getNTest()); return params != null ? r.withTrainInfo("parameters", LR.doubleArrayToList(params), "rSquared", R.calculateRSquared()) : r; }
@Override public void observationsFinished() { double Y[] = ys.toArray(); double X[][] = new double[Y.length][2]; for (int i = 0; i < Y.length; i++) { X[i][0] = Math.log(1 + ranks.get(i)); X[i][1] = logIfNecessary(scores.get(i)); } OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(Y, X); double [] params = regression.estimateRegressionParameters(); intercept = params[0]; rankCoeff = params[1]; scoreCoeff = params[2]; super.observationsFinished(); LOG.info("trained model on " + X.length + " observations: " + dump() + " with R-squared " + regression.calculateRSquared()); }
/** * <p>Returns the adjusted R-squared statistic, defined by the formula <pre> * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)] * </pre> * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals}, * SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares}, n is the number * of observations and p is the number of parameters estimated (including the intercept).</p> * * <p>If the regression is estimated without an intercept term, what is returned is <pre> * <code> 1 - (1 - {@link #calculateRSquared()}) * (n / (n - p)) </code> * </pre></p> * * <p>If there is no variance in y, i.e., SSTO = 0, NaN is returned.</p> * * @return adjusted R-Squared statistic * @throws NullPointerException if the sample has not been set * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular * @see #isNoIntercept() * @since 2.2 */ public double calculateAdjustedRSquared() { final double n = getX().getRowDimension(); if (isNoIntercept()) { return 1 - (1 - calculateRSquared()) * (n / (n - getX().getColumnDimension())); } else { return 1 - (calculateResidualSumOfSquares() * (n - 1)) / (calculateTotalSumOfSquares() * (n - getX().getColumnDimension())); } }
/** * <p>Returns the adjusted R-squared statistic, defined by the formula <pre> * R<sup>2</sup><sub>adj</sub> = 1 - [SSR (n - 1)] / [SSTO (n - p)] * </pre> * where SSR is the {@link #calculateResidualSumOfSquares() sum of squared residuals}, * SSTO is the {@link #calculateTotalSumOfSquares() total sum of squares}, n is the number * of observations and p is the number of parameters estimated (including the intercept).</p> * * <p>If the regression is estimated without an intercept term, what is returned is <pre> * <code> 1 - (1 - {@link #calculateRSquared()}) * (n / (n - p)) </code> * </pre></p> * * <p>If there is no variance in y, i.e., SSTO = 0, NaN is returned.</p> * * @return adjusted R-Squared statistic * @throws NullPointerException if the sample has not been set * @throws org.apache.commons.math3.linear.SingularMatrixException if the design matrix is singular * @see #isNoIntercept() * @since 2.2 */ public double calculateAdjustedRSquared() { final double n = getX().getRowDimension(); if (isNoIntercept()) { return 1 - (1 - calculateRSquared()) * (n / (n - getX().getColumnDimension())); } else { return 1 - (calculateResidualSumOfSquares() * (n - 1)) / (calculateTotalSumOfSquares() * (n - getX().getColumnDimension())); } }