|
| 1 | +import numpy as np |
| 2 | +from pylab import * |
| 3 | +from sklearn.metrics import r2_score |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +import warnings |
| 6 | +warnings.filterwarnings("ignore") |
| 7 | + |
| 8 | + |
| 9 | +class Regression: |
| 10 | + """The Class Regression tests if a set of data is fit in a polynomial regression model and returns a variety of results regarding its calculations""" |
| 11 | + __list_return: list |
| 12 | + |
| 13 | + def __init__(self, x: list[number], y: list[number]): |
| 14 | + np.random.seed(2) |
| 15 | + if not len(x) == len(y): |
| 16 | + raise Exception("Invalid input for x or y") |
| 17 | + if not len(x) > 2 and len(y) > 2: |
| 18 | + raise Exception("Invalid input for x or y") |
| 19 | + self.__x = np.array(x) |
| 20 | + self.__y = np.array(y) |
| 21 | + self.__polynomial() |
| 22 | + if not self.__list_return[1]: |
| 23 | + self.__polynomial(control=True) |
| 24 | + |
| 25 | + @property |
| 26 | + def x(self) -> list[number]: |
| 27 | + """Returns the list given in the x axis""" |
| 28 | + return self.__x.tolist() |
| 29 | + |
| 30 | + @property |
| 31 | + def y(self) -> list[number]: |
| 32 | + """Returns the list given in the y axis""" |
| 33 | + return self.__y.tolist() |
| 34 | + |
| 35 | + def get_r2(self) -> float: |
| 36 | + """Returns coefficient of determination (r²)""" |
| 37 | + return self.__list_return[0] |
| 38 | + |
| 39 | + def get_degree(self) -> int: |
| 40 | + """Returns the polinomial degree of the regression""" |
| 41 | + return self.__list_return[1] |
| 42 | + |
| 43 | + def get_ordinal(self) -> str: |
| 44 | + """Returns the ordinal suffix of the regression degree""" |
| 45 | + return self.__list_return[2] |
| 46 | + |
| 47 | + def get_full_degree(self) -> str: |
| 48 | + """Returns the polinomial degree with the ordinal suffix of the regression""" |
| 49 | + return str(self.get_degree()) + str(self.get_ordinal()) |
| 50 | + |
| 51 | + def get_coefficients(self) -> list[float]: |
| 52 | + """Returns the list of coefficients of the regression equation, |
| 53 | + going from the greater index degree towards the linear coefficient""" |
| 54 | + return self.__list_return[3] |
| 55 | + |
| 56 | + def get_prediction(self, x: float) -> float: |
| 57 | + """Returns the prediction for a specific x value using the polynomial regression calculated""" |
| 58 | + return self.__list_return[4](x) |
| 59 | + |
| 60 | + def equation_string(self) -> str: |
| 61 | + """Returns the polynomial equation as a string to be better displayed if necessary""" |
| 62 | + equation = "y = " |
| 63 | + equationX = "" |
| 64 | + coefficients = self.__list_return[3] |
| 65 | + for i in range(len(coefficients) - 1, -1, -1): |
| 66 | + if round(coefficients[len(coefficients) - (i + 1)], 4) == 0: |
| 67 | + continue |
| 68 | + equationX += f"{'+' if coefficients[len(coefficients) - (i + 1)] > 0 else '-'} {str(abs(round(coefficients[len(coefficients) - (i + 1)], 4))) + (f'x^{i}' if i > 1 else 'x' if i > 0 else '')} " |
| 69 | + |
| 70 | + equationX = equationX[2:] |
| 71 | + equation += equationX |
| 72 | + return equation |
| 73 | + |
| 74 | + def __set_list_return(self, r2, degree, coefficients, prediction): |
| 75 | + self.__list_return = ( |
| 76 | + r2, |
| 77 | + degree, |
| 78 | + "st" if degree == 1 else "nd" if degree == 2 else "rd" if degree == 3 else "th", |
| 79 | + coefficients, |
| 80 | + lambda x: prediction(x) |
| 81 | + ) |
| 82 | + |
| 83 | + def visualization(self): |
| 84 | + """ |
| 85 | + Plots both a scatter plot of the data and a line of the regression calculated |
| 86 | + """ |
| 87 | + xp = np.linspace(min(self.__x), max(self.__x)) |
| 88 | + plt.scatter(self.__x, self.__y) |
| 89 | + plt.plot(xp, self.__list_return[4](xp), c='r') |
| 90 | + plt.show() |
| 91 | + |
| 92 | + def __polynomial(self, control=True): |
| 93 | + """Function thats calculates the best polynomial regression given the two datasets""" |
| 94 | + r2 = 0 |
| 95 | + degree = 0 |
| 96 | + predict = "" |
| 97 | + coefficient = [] |
| 98 | + x = self.__x |
| 99 | + y = self.__y |
| 100 | + for i in range(1, 31): |
| 101 | + try: |
| 102 | + coefficients = np.polyfit(x, y, i) |
| 103 | + prediction = np.poly1d(coefficients) |
| 104 | + finally: |
| 105 | + if r2_score(y, prediction(x)) - i >= 0.9: |
| 106 | + self.__set_list_return(r2_score(y, prediction(x)), |
| 107 | + i, coefficients, prediction) |
| 108 | + |
| 109 | + if r2 < r2_score(y, prediction(x)) - (i / 30 if control else 0): |
| 110 | + r2 = r2_score(y, prediction(x)) |
| 111 | + degree = i |
| 112 | + predict = prediction |
| 113 | + coefficient = coefficients |
| 114 | + |
| 115 | + self.__set_list_return(r2, degree, coefficient, predict) |
| 116 | + |
| 117 | + def best_degree_polynomial(self) -> str: |
| 118 | + """Returns the best degree of polynomial formatted as a string""" |
| 119 | + return "\n " + f"The best polynomial to describe the given sets' behaviour is the {self.get_full_degree()} degree polynomial" |
| 120 | + |
| 121 | + def coefficient_of_determination(self) -> str: |
| 122 | + """Returns the coefficient of determination (R²) formatted as a string""" |
| 123 | + return "\n " + f"It has a coefficient of determination of {self.get_r2():.4f}" |
| 124 | + |
| 125 | + def __r2_interpretation(self) -> str: |
| 126 | + """Returns the coefficient of determination interpretation if needed""" |
| 127 | + if self.get_r2() < 0.45: |
| 128 | + return "\n" + f"This index being low, represents it is not possible to find any reliably predictable behaviour given the previous datasets, therefore the actual accuracy for the predictions will be low and highly dependent on chance" |
| 129 | + if self.get_r2() < 0.6: |
| 130 | + return "\n" + f"This index represents the predictions will not have optimal accuracy when making predictions since the given datasets don't set up an ideal predictable behaviour" |
| 131 | + return "" |
| 132 | + |
| 133 | + def equation_text(self) -> str: |
| 134 | + """Returns the polinomial equation formatted as a string""" |
| 135 | + return "\n " + f"The equation can be written as {self.equation_string()}" + "\n and makes predictions via the predict function\n" |
| 136 | + |
| 137 | + def full_text_analysis(self) -> str: |
| 138 | + """Returns the full text analysis""" |
| 139 | + return self.best_degree_polynomial() + self.coefficient_of_determination() + self.__r2_interpretation() + self.equation_text() |
| 140 | + |
| 141 | + def full_analysis(self) -> str: |
| 142 | + """Returns the full analysis with all text and visualization""" |
| 143 | + self.visualization() |
| 144 | + return self.full_text_analysis() |
| 145 | + |
| 146 | + def print_full_analysis(self): |
| 147 | + """Prints the full analysis with all text and visualization""" |
| 148 | + print(self.full_text_analysis()) |
| 149 | + self.visualization() |
| 150 | + |
| 151 | + |
| 152 | +def regress(x, y): |
| 153 | + """Returns an instance of the Regression Class""" |
| 154 | + return Regression(x, y) |
0 commit comments