documentation and whole refactor of main source code, plus files to be published

nikolas-virionis · nikolas-virionis · commit 2aa0ff0f71b6 · 2021-12-29T23:03:14.000-03:00
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,154 @@
+test.py
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
diff --git a/README.md b/README.md
@@ -1,6 +1,5 @@
 ﻿# Polynomial Regression
 
-
-Python script that analyses the given datasets and comes up with the best polynomial regression representation with the smallest polynomial degree possible, to be the most reliable with the least complexity possible
+Python package that analyses the given datasets and comes up with the best polynomial regression representation with the smallest polynomial degree possible, to be the most reliable with the least complexity possible
 
 In order to use the script properly change the x and y values on the top of the page to the two datasets you want to represent mathematically, then run the code, it will print the R² value, the equation and its degree, besides displaying the chart with the regression
diff --git a/main.py b/main.py
diff --git a/polynomial-regression/__init__.py b/polynomial-regression/__init__.py
@@ -0,0 +1,4 @@
+# __init__.py
+
+#version 
+__version__ = "2.0.0"
diff --git a/polynomial-regression/main.py b/polynomial-regression/main.py
@@ -0,0 +1,154 @@
+import numpy as np
+from pylab import *
+from sklearn.metrics import r2_score
+import matplotlib.pyplot as plt
+import warnings
+warnings.filterwarnings("ignore")
+
+
+class Regression:
+    """The Class Regression tests if a set of data is fit in a polynomial regression model and returns a variety of results regarding its calculations"""
+    __list_return: list
+
+    def __init__(self, x: list[number], y: list[number]):
+        np.random.seed(2)
+        if not len(x) == len(y):
+            raise Exception("Invalid input for x or y")
+        if not len(x) > 2 and len(y) > 2:
+            raise Exception("Invalid input for x or y")
+        self.__x = np.array(x)
+        self.__y = np.array(y)
+        self.__polynomial()
+        if not self.__list_return[1]:
+            self.__polynomial(control=True)
+
+    @property
+    def x(self) -> list[number]:
+        """Returns the list given in the x axis"""
+        return self.__x.tolist()
+
+    @property
+    def y(self) -> list[number]:
+        """Returns the list given in the y axis"""
+        return self.__y.tolist()
+
+    def get_r2(self) -> float:
+        """Returns coefficient of determination (r²)"""
+        return self.__list_return[0]
+
+    def get_degree(self) -> int:
+        """Returns the polinomial degree of the regression"""
+        return self.__list_return[1]
+
+    def get_ordinal(self) -> str:
+        """Returns the ordinal suffix of the regression degree"""
+        return self.__list_return[2]
+
+    def get_full_degree(self) -> str:
+        """Returns the polinomial degree with the ordinal suffix of the regression"""
+        return str(self.get_degree()) + str(self.get_ordinal())
+
+    def get_coefficients(self) -> list[float]:
+        """Returns the list of coefficients of the regression equation, 
+        going from the greater index degree towards the linear coefficient"""
+        return self.__list_return[3]
+
+    def get_prediction(self, x: float) -> float:
+        """Returns the prediction for a specific x value using the polynomial regression calculated"""
+        return self.__list_return[4](x)
+
+    def equation_string(self) -> str:
+        """Returns the polynomial equation as a string to be better displayed if necessary"""
+        equation = "y = "
+        equationX = ""
+        coefficients = self.__list_return[3]
+        for i in range(len(coefficients) - 1, -1, -1):
+            if round(coefficients[len(coefficients) - (i + 1)], 4) == 0:
+                continue
+            equationX += f"{'+' if coefficients[len(coefficients) - (i + 1)] > 0 else '-'} {str(abs(round(coefficients[len(coefficients) - (i + 1)], 4))) + (f'x^{i}' if i > 1 else 'x' if i > 0 else '')} "
+
+        equationX = equationX[2:]
+        equation += equationX
+        return equation
+
+    def __set_list_return(self, r2, degree, coefficients, prediction):
+        self.__list_return = (
+            r2,
+            degree,
+            "st" if degree == 1 else "nd" if degree == 2 else "rd" if degree == 3 else "th",
+            coefficients,
+            lambda x: prediction(x)
+        )
+
+    def visualization(self):
+        """
+        Plots both a scatter plot of the data and a line of the regression calculated
+        """
+        xp = np.linspace(min(self.__x), max(self.__x))
+        plt.scatter(self.__x, self.__y)
+        plt.plot(xp, self.__list_return[4](xp), c='r')
+        plt.show()
+
+    def __polynomial(self, control=True):
+        """Function thats calculates the best polynomial regression given the two datasets"""
+        r2 = 0
+        degree = 0
+        predict = ""
+        coefficient = []
+        x = self.__x
+        y = self.__y
+        for i in range(1, 31):
+            try:
+                coefficients = np.polyfit(x, y, i)
+                prediction = np.poly1d(coefficients)
+            finally:
+                if r2_score(y, prediction(x)) - i >= 0.9:
+                    self.__set_list_return(r2_score(y, prediction(x)),
+                                           i, coefficients, prediction)
+
+                if r2 < r2_score(y, prediction(x)) - (i / 30 if control else 0):
+                    r2 = r2_score(y, prediction(x))
+                    degree = i
+                    predict = prediction
+                    coefficient = coefficients
+
+        self.__set_list_return(r2, degree, coefficient, predict)
+
+    def best_degree_polynomial(self) -> str:
+        """Returns the best degree of polynomial formatted as a string"""
+        return "\n " + f"The best polynomial to describe the given sets' behaviour is the {self.get_full_degree()} degree polynomial"
+
+    def coefficient_of_determination(self) -> str:
+        """Returns the coefficient of determination (R²) formatted as a string"""
+        return "\n " + f"It has a coefficient of determination of {self.get_r2():.4f}"
+
+    def __r2_interpretation(self) -> str:
+        """Returns the coefficient of determination interpretation if needed"""
+        if self.get_r2() < 0.45:
+            return "\n" + f"This index being low, represents it is not possible to find any reliably predictable behaviour given the previous datasets, therefore the actual accuracy for the predictions will be low and highly dependent on chance"
+        if self.get_r2() < 0.6:
+            return "\n" + f"This index represents the predictions will not have optimal accuracy when making predictions since the given datasets don't set up an ideal predictable behaviour"
+        return ""
+
+    def equation_text(self) -> str:
+        """Returns the polinomial equation formatted as a string"""
+        return "\n " + f"The equation can be written as {self.equation_string()}" + "\n and makes predictions via the predict function\n"
+
+    def full_text_analysis(self) -> str:
+        """Returns the full text analysis"""
+        return self.best_degree_polynomial() + self.coefficient_of_determination() + self.__r2_interpretation() + self.equation_text()
+
+    def full_analysis(self) -> str:
+        """Returns the full analysis with all text and visualization"""
+        self.visualization()
+        return self.full_text_analysis()
+
+    def print_full_analysis(self):
+        """Prints the full analysis with all text and visualization"""
+        print(self.full_text_analysis())
+        self.visualization()
+
+
+def regress(x, y):
+    """Returns an instance of the Regression Class"""
+    return Regression(x, y)
diff --git a/setup.py b/setup.py