3
3
4
4
#include < vector>
5
5
#include < cmath>
6
- #include < iostream>
6
+ #include < stdexcept>
7
+ #include < algorithm>
8
+ #include < numeric>
7
9
8
10
/* *
9
11
* @file PolynomialRegression.hpp
10
- * @brief A simple implementation of Polynomial Regression.
12
+ * @brief Improved implementation of Polynomial Regression.
11
13
*/
14
+
12
15
/* *
13
16
* @class PolynomialRegression
14
17
* @brief Polynomial Regression model for fitting polynomial curves.
@@ -18,15 +21,33 @@ class PolynomialRegression {
18
21
/* *
19
22
* @brief Constructor initializing the polynomial degree.
20
23
* @param degree Degree of the polynomial.
24
+ * @param regularizationParameter Regularization strength (default is 0.0, no regularization).
21
25
*/
22
- PolynomialRegression (int degree) : degree_(degree) {}
26
+ PolynomialRegression (int degree, double regularizationParameter = 0.0 )
27
+ : degree_(degree), lambda_(regularizationParameter) {
28
+ if (degree_ < 0 ) {
29
+ throw std::invalid_argument (" Degree of the polynomial must be non-negative." );
30
+ }
31
+ if (lambda_ < 0.0 ) {
32
+ throw std::invalid_argument (" Regularization parameter must be non-negative." );
33
+ }
34
+ }
23
35
24
36
/* *
25
37
* @brief Train the model using features and target values.
26
38
* @param x Feature vector.
27
39
* @param y Target vector.
28
40
*/
29
41
void train (const std::vector<double >& x, const std::vector<double >& y) {
42
+ if (x.size () != y.size ()) {
43
+ throw std::invalid_argument (" Feature and target vectors must have the same length." );
44
+ }
45
+ if (x.empty ()) {
46
+ throw std::invalid_argument (" Input vectors must not be empty." );
47
+ }
48
+ if (x.size () <= static_cast <size_t >(degree_)) {
49
+ throw std::invalid_argument (" Number of data points must be greater than the polynomial degree." );
50
+ }
30
51
computeCoefficients (x, y);
31
52
}
32
53
@@ -36,15 +57,25 @@ class PolynomialRegression {
36
57
* @return Predicted value.
37
58
*/
38
59
double predict (double x) const {
39
- double result = 0.0 ;
40
- for (int i = 0 ; i <= degree_; ++i) {
41
- result += coefficients_[i] * std::pow (x, i);
60
+ // Use Horner's method for efficient polynomial evaluation
61
+ double result = coefficients_.back ();
62
+ for (int i = coefficients_.size () - 2 ; i >= 0 ; --i) {
63
+ result = result * x + coefficients_[i];
42
64
}
43
65
return result;
44
66
}
45
67
68
+ /* *
69
+ * @brief Get the coefficients of the fitted polynomial.
70
+ * @return Vector of coefficients.
71
+ */
72
+ std::vector<double > getCoefficients () const {
73
+ return coefficients_;
74
+ }
75
+
46
76
private:
47
- int degree_; // /< Degree of the polynomial
77
+ int degree_; // /< Degree of the polynomial
78
+ double lambda_; // /< Regularization parameter
48
79
std::vector<double > coefficients_; // /< Coefficients of the polynomial
49
80
50
81
/* *
@@ -53,66 +84,89 @@ class PolynomialRegression {
53
84
* @param y Target vector.
54
85
*/
55
86
void computeCoefficients (const std::vector<double >& x, const std::vector<double >& y) {
56
- int n = x.size ();
87
+ size_t n = x.size ();
57
88
int m = degree_ + 1 ;
58
89
59
- // Create the matrix X and vector Y for the normal equation
60
- std::vector<std::vector<double >> X (m, std::vector<double >(m, 0.0 ));
61
- std::vector<double > Y (m, 0.0 );
90
+ // Precompute powers of x
91
+ std::vector<std::vector<double >> X (n, std::vector<double >(m));
92
+ for (size_t i = 0 ; i < n; ++i) {
93
+ X[i][0 ] = 1.0 ;
94
+ for (int j = 1 ; j < m; ++j) {
95
+ X[i][j] = X[i][j - 1 ] * x[i];
96
+ }
97
+ }
98
+
99
+ // Construct matrices for normal equations: (X^T X + lambda * I) * w = X^T y
100
+ std::vector<std::vector<double >> XtX (m, std::vector<double >(m, 0.0 ));
101
+ std::vector<double > Xty (m, 0.0 );
62
102
63
- // Populate X and Y using the x and y data points
64
- for (int i = 0 ; i < n; ++i) {
65
- for (int j = 0 ; j < m; ++j) {
66
- Y[j] += std::pow (x[i], j) * y[i];
67
- for (int k = 0 ; k < m; ++k) {
68
- X[j][k] += std::pow (x[i], j + k);
103
+ for (int i = 0 ; i < m; ++i) {
104
+ for (size_t k = 0 ; k < n; ++k) {
105
+ Xty[i] += X[k][i] * y[k];
106
+ }
107
+ for (int j = i; j < m; ++j) {
108
+ for (size_t k = 0 ; k < n; ++k) {
109
+ XtX[i][j] += X[k][i] * X[k][j];
69
110
}
111
+ XtX[j][i] = XtX[i][j]; // Symmetric matrix
112
+ }
113
+ // Add regularization term
114
+ if (i > 0 ) { // Do not regularize bias term
115
+ XtX[i][i] += lambda_;
70
116
}
71
117
}
72
118
73
- // Solve the system using Gaussian elimination
74
- coefficients_ = gaussianElimination (X, Y );
119
+ // Solve the system using a more stable method (e.g., Cholesky decomposition)
120
+ coefficients_ = solveLinearSystem (XtX, Xty );
75
121
}
76
122
77
123
/* *
78
- * @brief Solves the linear system using Gaussian elimination .
79
- * @param A Matrix representing the system's coefficients .
80
- * @param b Vector representing the constant terms .
81
- * @return Solution vector containing the polynomial coefficients .
124
+ * @brief Solves a symmetric positive-definite linear system using Cholesky decomposition .
125
+ * @param A Symmetric positive-definite matrix .
126
+ * @param b Right-hand side vector .
127
+ * @return Solution vector.
82
128
*/
83
- std::vector<double > gaussianElimination ( std::vector<std::vector<double >>& A, std::vector<double >& b) {
129
+ std::vector<double > solveLinearSystem ( const std::vector<std::vector<double >>& A, const std::vector<double >& b) {
84
130
int n = A.size ();
131
+ std::vector<std::vector<double >> L (n, std::vector<double >(n, 0.0 ));
85
132
86
- // Perform Gaussian elimination
133
+ // Cholesky decomposition: A = L * L^T
87
134
for (int i = 0 ; i < n; ++i) {
88
- // Partial pivoting
89
- int maxRow = i;
90
- for (int k = i + 1 ; k < n; ++k) {
91
- if (std::fabs (A[k][i]) > std::fabs (A[maxRow][i])) {
92
- maxRow = k;
135
+ for (int k = 0 ; k <= i; ++k) {
136
+ double sum = 0.0 ;
137
+ for (int j = 0 ; j < k; ++j) {
138
+ sum += L[i][j] * L[k][j];
139
+ }
140
+ if (i == k) {
141
+ double val = A[i][i] - sum;
142
+ if (val <= 0.0 ) {
143
+ throw std::runtime_error (" Matrix is not positive-definite." );
144
+ }
145
+ L[i][k] = std::sqrt (val);
146
+ } else {
147
+ L[i][k] = (A[i][k] - sum) / L[k][k];
93
148
}
94
149
}
95
- std::swap (A[i], A[maxRow]);
96
- std::swap (b[i], b[maxRow]);
150
+ }
97
151
98
- // Make all rows below this one 0 in the current column
99
- for (int k = i + 1 ; k < n; ++k) {
100
- double factor = A[k][i] / A[i][i];
101
- for (int j = i; j < n; ++j) {
102
- A[k][j] -= factor * A[i][j];
103
- }
104
- b[k] -= factor * b[i];
152
+ // Solve L * y = b
153
+ std::vector<double > y (n);
154
+ for (int i = 0 ; i < n; ++i) {
155
+ double sum = 0.0 ;
156
+ for (int k = 0 ; k < i; ++k) {
157
+ sum += L[i][k] * y[k];
105
158
}
159
+ y[i] = (b[i] - sum) / L[i][i];
106
160
}
107
161
108
- // Back substitution
109
- std::vector<double > x (n, 0.0 );
162
+ // Solve L^T * x = y
163
+ std::vector<double > x (n);
110
164
for (int i = n - 1 ; i >= 0 ; --i) {
111
- x[i] = b[i] ;
112
- for (int j = i + 1 ; j < n; ++j ) {
113
- x[i] -= A[i][j ] * x[j ];
165
+ double sum = 0.0 ;
166
+ for (int k = i + 1 ; k < n; ++k ) {
167
+ sum += L[k][i ] * x[k ];
114
168
}
115
- x[i] /= A [i][i];
169
+ x[i] = (y[i] - sum) / L [i][i];
116
170
}
117
171
118
172
return x;
0 commit comments