|
| 1 | +#include <iostream> |
| 2 | +#include <vector> |
| 3 | +#include <string> |
| 4 | +#include <regex> |
| 5 | +#include <sstream> |
| 6 | +#include <cstdlib> |
| 7 | +using namespace std; |
| 8 | + |
| 9 | +class input_handler { |
| 10 | + public: |
| 11 | + vector<double> x_data; |
| 12 | + vector<double> y_data; |
| 13 | + string userInput; |
| 14 | + input_handler() { |
| 15 | + getline(cin, userInput); |
| 16 | + input_pusher(userInput); |
| 17 | + } |
| 18 | + |
| 19 | + ~input_handler() { |
| 20 | + x_data.clear(); |
| 21 | + y_data.clear(); |
| 22 | + } |
| 23 | + |
| 24 | + void input_pusher (const string& userInput) { |
| 25 | + //regex digitGrabber("\\d+\\.?\\d*"); |
| 26 | + regex puncRemover("[^0-9.-]"); |
| 27 | + string cleaned = regex_replace(userInput, puncRemover, " "); |
| 28 | + istringstream stream(cleaned); |
| 29 | + string tokenized_input; |
| 30 | + int interval = 0; |
| 31 | + while(stream >> tokenized_input) { |
| 32 | + interval++; |
| 33 | + if(interval % 2 != 0) |
| 34 | + x_data.push_back(stod(tokenized_input)); |
| 35 | + else |
| 36 | + y_data.push_back(stod(tokenized_input)); |
| 37 | + } |
| 38 | + if(y_data.size() != x_data.size()) { |
| 39 | + cout << "Error: unbalanced values" << endl; |
| 40 | + exit(1); |
| 41 | + } |
| 42 | + |
| 43 | + } |
| 44 | +}; |
| 45 | + |
| 46 | +class regressionModel : public input_handler{ |
| 47 | + public: |
| 48 | + double m = 0; |
| 49 | + double b = 0; |
| 50 | + double learning_rate = 0.01; //learning rate is like the how much the slope is rotating towards its desired location |
| 51 | + int iterations = 1000; //controls how many times the learning gets iterated |
| 52 | + |
| 53 | + ~regressionModel() { |
| 54 | + cout << "Predicted Slope: y = " << m << "x + " << b << endl; |
| 55 | + m = 0; |
| 56 | + b = 0; |
| 57 | + } |
| 58 | + |
| 59 | + regressionModel() { |
| 60 | + train(); |
| 61 | + } |
| 62 | + |
| 63 | + void train() { |
| 64 | + int n = x_data.size(); //amount of coordinates in order to get averages |
| 65 | + double prev_difference = 0.0; //holds the previous difference |
| 66 | + double curr_difference = 0.0; //holds the current difference |
| 67 | + for(int i = 0; i < iterations; i++) { |
| 68 | + double compilation_m = 0; // set these to zero to create new instances each iteration |
| 69 | + double compilation_b = 0; |
| 70 | + vector<double> y_prediction = predict(m, b); //execute 'y = mx + b' based off of compilation values |
| 71 | + curr_difference = 0; //creates new instances of the current difference |
| 72 | + for(int j = 0; j < n; j++) { |
| 73 | + //gradient descent |
| 74 | + compilation_m += -2 * x_data[j] * (y_data[j] - y_prediction[j]); //m has both x/y-components |
| 75 | + compilation_b += -2 * (y_data[j] - y_prediction[j]); //b only has a y-component |
| 76 | + |
| 77 | + curr_difference += (y_data[j] - (m * x_data[j] + b)) * (y_data[j] - (m * x_data[j] + b)); //square error method - takes the origianal y-value to the predicted y-vale and finds the difference |
| 78 | + //we multiple it to avoid negatives and to make it more zoomed in and be more accurate |
| 79 | + } |
| 80 | + curr_difference /= n; //gets the average |
| 81 | + |
| 82 | + if (abs(curr_difference - prev_difference) < 1e-6) break; //if the difference is small between the last iteration, break out of the loop |
| 83 | + |
| 84 | + prev_difference = curr_difference; //do recursion for the previous difference |
| 85 | + |
| 86 | + m -= (learning_rate * compilation_m) / n; //gets the average with respect of the learning rate (this gets repeatadly updated till we decide to print it) |
| 87 | + b -= (learning_rate * compilation_b) / n; |
| 88 | + |
| 89 | + } |
| 90 | + } |
| 91 | + vector<double> predict(double m, double b) { |
| 92 | + vector<double> y_prediction; |
| 93 | + for(double X: x_data) |
| 94 | + y_prediction.push_back(m * X + b); |
| 95 | + return y_prediction; |
| 96 | + } |
| 97 | +}; |
| 98 | + |
| 99 | +int main () { |
| 100 | + regressionModel model; |
| 101 | + return 0; |
| 102 | +} |
0 commit comments