Skip to content

Commit 2aa0ff0

Browse files
documentation and whole refactor of main source code, plus files to be published
1 parent 46bd22b commit 2aa0ff0

File tree

6 files changed

+339
-75
lines changed

6 files changed

+339
-75
lines changed

.gitignore

+154
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
test.py
2+
3+
# Byte-compiled / optimized / DLL files
4+
__pycache__/
5+
*.py[cod]
6+
*$py.class
7+
8+
# C extensions
9+
*.so
10+
11+
# Distribution / packaging
12+
.Python
13+
build/
14+
develop-eggs/
15+
dist/
16+
downloads/
17+
eggs/
18+
.eggs/
19+
lib/
20+
lib64/
21+
parts/
22+
sdist/
23+
var/
24+
wheels/
25+
share/python-wheels/
26+
*.egg-info/
27+
.installed.cfg
28+
*.egg
29+
MANIFEST
30+
31+
# PyInstaller
32+
# Usually these files are written by a python script from a template
33+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
34+
*.manifest
35+
*.spec
36+
37+
# Installer logs
38+
pip-log.txt
39+
pip-delete-this-directory.txt
40+
41+
# Unit test / coverage reports
42+
htmlcov/
43+
.tox/
44+
.nox/
45+
.coverage
46+
.coverage.*
47+
.cache
48+
nosetests.xml
49+
coverage.xml
50+
*.cover
51+
*.py,cover
52+
.hypothesis/
53+
.pytest_cache/
54+
cover/
55+
56+
# Translations
57+
*.mo
58+
*.pot
59+
60+
# Django stuff:
61+
*.log
62+
local_settings.py
63+
db.sqlite3
64+
db.sqlite3-journal
65+
66+
# Flask stuff:
67+
instance/
68+
.webassets-cache
69+
70+
# Scrapy stuff:
71+
.scrapy
72+
73+
# Sphinx documentation
74+
docs/_build/
75+
76+
# PyBuilder
77+
.pybuilder/
78+
target/
79+
80+
# Jupyter Notebook
81+
.ipynb_checkpoints
82+
83+
# IPython
84+
profile_default/
85+
ipython_config.py
86+
87+
# pyenv
88+
# For a library or package, you might want to ignore these files since the code is
89+
# intended to run in multiple environments; otherwise, check them in:
90+
# .python-version
91+
92+
# pipenv
93+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
94+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
95+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
96+
# install all needed dependencies.
97+
#Pipfile.lock
98+
99+
# poetry
100+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
101+
# This is especially recommended for binary packages to ensure reproducibility, and is more
102+
# commonly ignored for libraries.
103+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
104+
#poetry.lock
105+
106+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
107+
__pypackages__/
108+
109+
# Celery stuff
110+
celerybeat-schedule
111+
celerybeat.pid
112+
113+
# SageMath parsed files
114+
*.sage.py
115+
116+
# Environments
117+
.env
118+
.venv
119+
env/
120+
venv/
121+
ENV/
122+
env.bak/
123+
venv.bak/
124+
125+
# Spyder project settings
126+
.spyderproject
127+
.spyproject
128+
129+
# Rope project settings
130+
.ropeproject
131+
132+
# mkdocs documentation
133+
/site
134+
135+
# mypy
136+
.mypy_cache/
137+
.dmypy.json
138+
dmypy.json
139+
140+
# Pyre type checker
141+
.pyre/
142+
143+
# pytype static type analyzer
144+
.pytype/
145+
146+
# Cython debug symbols
147+
cython_debug/
148+
149+
# PyCharm
150+
# JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
151+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
152+
# and can be added to the global gitignore or merged into this file. For a more nuclear
153+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
154+
#.idea/

README.md

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
# Polynomial Regression
22

3-
4-
Python script that analyses the given datasets and comes up with the best polynomial regression representation with the smallest polynomial degree possible, to be the most reliable with the least complexity possible
3+
Python package that analyses the given datasets and comes up with the best polynomial regression representation with the smallest polynomial degree possible, to be the most reliable with the least complexity possible
54

65
In order to use the script properly change the x and y values on the top of the page to the two datasets you want to represent mathematically, then run the code, it will print the R² value, the equation and its degree, besides displaying the chart with the regression

main.py

-73
This file was deleted.

polynomial-regression/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# __init__.py
2+
3+
#version
4+
__version__ = "2.0.0"

polynomial-regression/main.py

+154
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
import numpy as np
2+
from pylab import *
3+
from sklearn.metrics import r2_score
4+
import matplotlib.pyplot as plt
5+
import warnings
6+
warnings.filterwarnings("ignore")
7+
8+
9+
class Regression:
10+
"""The Class Regression tests if a set of data is fit in a polynomial regression model and returns a variety of results regarding its calculations"""
11+
__list_return: list
12+
13+
def __init__(self, x: list[number], y: list[number]):
14+
np.random.seed(2)
15+
if not len(x) == len(y):
16+
raise Exception("Invalid input for x or y")
17+
if not len(x) > 2 and len(y) > 2:
18+
raise Exception("Invalid input for x or y")
19+
self.__x = np.array(x)
20+
self.__y = np.array(y)
21+
self.__polynomial()
22+
if not self.__list_return[1]:
23+
self.__polynomial(control=True)
24+
25+
@property
26+
def x(self) -> list[number]:
27+
"""Returns the list given in the x axis"""
28+
return self.__x.tolist()
29+
30+
@property
31+
def y(self) -> list[number]:
32+
"""Returns the list given in the y axis"""
33+
return self.__y.tolist()
34+
35+
def get_r2(self) -> float:
36+
"""Returns coefficient of determination (r²)"""
37+
return self.__list_return[0]
38+
39+
def get_degree(self) -> int:
40+
"""Returns the polinomial degree of the regression"""
41+
return self.__list_return[1]
42+
43+
def get_ordinal(self) -> str:
44+
"""Returns the ordinal suffix of the regression degree"""
45+
return self.__list_return[2]
46+
47+
def get_full_degree(self) -> str:
48+
"""Returns the polinomial degree with the ordinal suffix of the regression"""
49+
return str(self.get_degree()) + str(self.get_ordinal())
50+
51+
def get_coefficients(self) -> list[float]:
52+
"""Returns the list of coefficients of the regression equation,
53+
going from the greater index degree towards the linear coefficient"""
54+
return self.__list_return[3]
55+
56+
def get_prediction(self, x: float) -> float:
57+
"""Returns the prediction for a specific x value using the polynomial regression calculated"""
58+
return self.__list_return[4](x)
59+
60+
def equation_string(self) -> str:
61+
"""Returns the polynomial equation as a string to be better displayed if necessary"""
62+
equation = "y = "
63+
equationX = ""
64+
coefficients = self.__list_return[3]
65+
for i in range(len(coefficients) - 1, -1, -1):
66+
if round(coefficients[len(coefficients) - (i + 1)], 4) == 0:
67+
continue
68+
equationX += f"{'+' if coefficients[len(coefficients) - (i + 1)] > 0 else '-'} {str(abs(round(coefficients[len(coefficients) - (i + 1)], 4))) + (f'x^{i}' if i > 1 else 'x' if i > 0 else '')} "
69+
70+
equationX = equationX[2:]
71+
equation += equationX
72+
return equation
73+
74+
def __set_list_return(self, r2, degree, coefficients, prediction):
75+
self.__list_return = (
76+
r2,
77+
degree,
78+
"st" if degree == 1 else "nd" if degree == 2 else "rd" if degree == 3 else "th",
79+
coefficients,
80+
lambda x: prediction(x)
81+
)
82+
83+
def visualization(self):
84+
"""
85+
Plots both a scatter plot of the data and a line of the regression calculated
86+
"""
87+
xp = np.linspace(min(self.__x), max(self.__x))
88+
plt.scatter(self.__x, self.__y)
89+
plt.plot(xp, self.__list_return[4](xp), c='r')
90+
plt.show()
91+
92+
def __polynomial(self, control=True):
93+
"""Function thats calculates the best polynomial regression given the two datasets"""
94+
r2 = 0
95+
degree = 0
96+
predict = ""
97+
coefficient = []
98+
x = self.__x
99+
y = self.__y
100+
for i in range(1, 31):
101+
try:
102+
coefficients = np.polyfit(x, y, i)
103+
prediction = np.poly1d(coefficients)
104+
finally:
105+
if r2_score(y, prediction(x)) - i >= 0.9:
106+
self.__set_list_return(r2_score(y, prediction(x)),
107+
i, coefficients, prediction)
108+
109+
if r2 < r2_score(y, prediction(x)) - (i / 30 if control else 0):
110+
r2 = r2_score(y, prediction(x))
111+
degree = i
112+
predict = prediction
113+
coefficient = coefficients
114+
115+
self.__set_list_return(r2, degree, coefficient, predict)
116+
117+
def best_degree_polynomial(self) -> str:
118+
"""Returns the best degree of polynomial formatted as a string"""
119+
return "\n " + f"The best polynomial to describe the given sets' behaviour is the {self.get_full_degree()} degree polynomial"
120+
121+
def coefficient_of_determination(self) -> str:
122+
"""Returns the coefficient of determination (R²) formatted as a string"""
123+
return "\n " + f"It has a coefficient of determination of {self.get_r2():.4f}"
124+
125+
def __r2_interpretation(self) -> str:
126+
"""Returns the coefficient of determination interpretation if needed"""
127+
if self.get_r2() < 0.45:
128+
return "\n" + f"This index being low, represents it is not possible to find any reliably predictable behaviour given the previous datasets, therefore the actual accuracy for the predictions will be low and highly dependent on chance"
129+
if self.get_r2() < 0.6:
130+
return "\n" + f"This index represents the predictions will not have optimal accuracy when making predictions since the given datasets don't set up an ideal predictable behaviour"
131+
return ""
132+
133+
def equation_text(self) -> str:
134+
"""Returns the polinomial equation formatted as a string"""
135+
return "\n " + f"The equation can be written as {self.equation_string()}" + "\n and makes predictions via the predict function\n"
136+
137+
def full_text_analysis(self) -> str:
138+
"""Returns the full text analysis"""
139+
return self.best_degree_polynomial() + self.coefficient_of_determination() + self.__r2_interpretation() + self.equation_text()
140+
141+
def full_analysis(self) -> str:
142+
"""Returns the full analysis with all text and visualization"""
143+
self.visualization()
144+
return self.full_text_analysis()
145+
146+
def print_full_analysis(self):
147+
"""Prints the full analysis with all text and visualization"""
148+
print(self.full_text_analysis())
149+
self.visualization()
150+
151+
152+
def regress(x, y):
153+
"""Returns an instance of the Regression Class"""
154+
return Regression(x, y)

0 commit comments

Comments
 (0)