Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions data.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
x,y
32.502345269453031,31.70700584656992
53.426804033275019,68.77759598163891
61.530358025636438,62.562382297945803
Expand Down
68 changes: 68 additions & 0 deletions linear_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from typing import List

class LinearRegressionModel:
def __init__(self):
self.xs = []
self.ys = []
self.m = 0.0
self.b = 0.0

def points(self, xs: List[float], ys: List[float]):
self.xs = xs
self.ys = ys

def _squared_error(self) -> float:
error = 0.0
n = len(self.xs)

for i in range(n):
error += (self.ys[i] - (self.m * self.xs[i] + self.b)) ** 2

return error

def _gradient_step(self):
m_gradient = 0.0
b_gradient = 0.0
n = len(self.xs)

for i in range(n):
m_gradient += -2 * (self.ys[i] - self.m * self.xs[i] - self.b) * self.xs[i]
b_gradient += -2 * (self.ys[i] - self.m * self.xs[i] - self.b)

return [ m_gradient, b_gradient ]

def _add_gradient(self, learning_rate: float):
m_gradient, b_gradient = self._gradient_step()
self.m -= (learning_rate * m_gradient)
self.b -= (learning_rate * b_gradient)

def train(self, epochs: int = 500, learning_rate: float = 0.001):
assert len(self.xs) == len(self.ys), "Input and output lists must have the same length."

assert len(self.xs) > 2, "Data must contain at least 3 points."

for epoch in range(epochs):
self._add_gradient(learning_rate)

def predict(self, x: float) -> float:
return self.m * x + self.b

if __name__ == "__main__":
import random

random.seed(42)

xs = [i for i in range(100)]
ys = [2 * x + 3 + random.uniform(-10, 10) for x in xs]

model = LinearRegressionModel()
model.points(xs, ys)

model.train(epochs=100, learning_rate=0.000001)

print(f"Trained model: y = {model.m}x + {model.b}")

# Test prediction

test_x = 50
print(f"Prediction for x={test_x}: y={model.predict(test_x)}")
26 changes: 26 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pandas as pd
import matplotlib.pyplot as plt
from linear_regression import LinearRegressionModel

points = pd.read_csv("./data.csv")

xs = points['x'].values
ys = points['y'].values

model = LinearRegressionModel()
model.points(xs, ys)
model.train(epochs=100, learning_rate=0.0000001)

print(f"m = {model.m}, b = {model.b}")

plt.scatter(xs, ys, color='blue', label='Data Points')

predictions = [
model.predict(x) for x in xs
]

plt.plot(xs, predictions, color='red', label='Regression Line')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()