From 36ba8e95d756670bee76262bd01a5954c4a4364c Mon Sep 17 00:00:00 2001 From: Swayam Bhoir Date: Sun, 21 Dec 2025 16:46:06 +0530 Subject: [PATCH] Add class oriented linear regression model. --- data.csv | 1 + linear_regression.py | 68 ++++++++++++++++++++++++++++++++++++++++++++ test.py | 26 +++++++++++++++++ 3 files changed, 95 insertions(+) create mode 100644 linear_regression.py create mode 100644 test.py diff --git a/data.csv b/data.csv index 9e8e26c..2e61e5c 100644 --- a/data.csv +++ b/data.csv @@ -1,3 +1,4 @@ +x,y 32.502345269453031,31.70700584656992 53.426804033275019,68.77759598163891 61.530358025636438,62.562382297945803 diff --git a/linear_regression.py b/linear_regression.py new file mode 100644 index 0000000..5658ba0 --- /dev/null +++ b/linear_regression.py @@ -0,0 +1,68 @@ +from typing import List + +class LinearRegressionModel: + def __init__(self): + self.xs = [] + self.ys = [] + self.m = 0.0 + self.b = 0.0 + + def points(self, xs: List[float], ys: List[float]): + self.xs = xs + self.ys = ys + + def _squared_error(self) -> float: + error = 0.0 + n = len(self.xs) + + for i in range(n): + error += (self.ys[i] - (self.m * self.xs[i] + self.b)) ** 2 + + return error + + def _gradient_step(self): + m_gradient = 0.0 + b_gradient = 0.0 + n = len(self.xs) + + for i in range(n): + m_gradient += -2 * (self.ys[i] - self.m * self.xs[i] - self.b) * self.xs[i] + b_gradient += -2 * (self.ys[i] - self.m * self.xs[i] - self.b) + + return [ m_gradient, b_gradient ] + + def _add_gradient(self, learning_rate: float): + m_gradient, b_gradient = self._gradient_step() + self.m -= (learning_rate * m_gradient) + self.b -= (learning_rate * b_gradient) + + def train(self, epochs: int = 500, learning_rate: float = 0.001): + assert len(self.xs) == len(self.ys), "Input and output lists must have the same length." + + assert len(self.xs) > 2, "Data must contain at least 3 points." + + for epoch in range(epochs): + self._add_gradient(learning_rate) + + def predict(self, x: float) -> float: + return self.m * x + self.b + +if __name__ == "__main__": + import random + + random.seed(42) + + xs = [i for i in range(100)] + ys = [2 * x + 3 + random.uniform(-10, 10) for x in xs] + + model = LinearRegressionModel() + model.points(xs, ys) + + model.train(epochs=100, learning_rate=0.000001) + + print(f"Trained model: y = {model.m}x + {model.b}") + + # Test prediction + + test_x = 50 + print(f"Prediction for x={test_x}: y={model.predict(test_x)}") diff --git a/test.py b/test.py new file mode 100644 index 0000000..f144bdc --- /dev/null +++ b/test.py @@ -0,0 +1,26 @@ +import pandas as pd +import matplotlib.pyplot as plt +from linear_regression import LinearRegressionModel + +points = pd.read_csv("./data.csv") + +xs = points['x'].values +ys = points['y'].values + +model = LinearRegressionModel() +model.points(xs, ys) +model.train(epochs=100, learning_rate=0.0000001) + +print(f"m = {model.m}, b = {model.b}") + +plt.scatter(xs, ys, color='blue', label='Data Points') + +predictions = [ + model.predict(x) for x in xs +] + +plt.plot(xs, predictions, color='red', label='Regression Line') +plt.xlabel('x') +plt.ylabel('y') +plt.legend() +plt.show()