subbarayudu-j · 23f3000733 · Nov 15, 2025
diff --git a/machine_learning/Data_classification/Knn_neibour.py b/machine_learning/Data_classification/Knn_neibour.py
@@ -0,0 +1,47 @@
+
+# Separate features and labels
+def separate_features_labels(data):
+    features = [point[:-1] for point in data]
+    labels = [point[-1] for point in data]
+    return features, labels
+
+def euclidean_distance(point1, point2):
+    return sum((a - b) ** 2 for a, b in zip(point1, point2)) ** 0.5
+
+def knn_classify(test_point, data_points, k):
+    features, labels = separate_features_labels(data_points)
+    distances = []
+
+    for index, data_point in enumerate(features):
+        distance = euclidean_distance(test_point, data_point)
+        distances.append((distance, labels[index]))
+
+    distances.sort(key=lambda x: x[0])
+    k_nearest_labels = [label for _, label in distances[:k]]
+
+    prediction = max(set(k_nearest_labels), key=k_nearest_labels.count)
+    return prediction
+
+if __name__ == "__main__":
+    data_points = [
+        (5, 3, 0), (10, 15, 1), (8, 6, 0), (3, 7, 0), (12, 8, 1),
+        (7, 14, 1), (4, 5, 0), (6, 9, 0), (14, 12, 1), (9, 11, 1),
+        (11, 5, 0), (13, 7, 1), (6, 10, 0), (8, 13, 1), (5, 14, 1),
+        (7, 4, 0), (10, 6, 0), (12, 9, 1), (9, 15, 1), (11, 13, 1),
+        (3, 5, 0), (4, 10, 0), (14, 14, 1), (13, 9, 1), (8, 8, 0),
+        (7, 7, 0), (15, 11, 1), (12, 6, 0), (9, 10, 0), (5, 13, 1),
+        (6, 12, 1), (10, 10, 0), (13, 11, 1), (4, 8, 0), (3, 9, 0),
+        (15, 13, 1), (14, 9, 1), (11, 7, 0), (8, 12, 1), (7, 13, 1),
+        (5, 10, 0), (6, 11, 0), (9, 7, 0), (10, 9, 0), (12, 15, 1),
+        (14, 13, 1), (15, 10, 1), (11, 12, 1), (4, 6, 0), (3, 8, 0)
+    ]
+
+    test_points = [
+        (2, 1), (5, 5), (11, 8), (7, 12), (14, 13), 
+        (10, 9), (6, 10), (12, 12), (4, 7), (13, 14)
+    ]
+
+    k = 5
+    for test_point in test_points:
+        prediction = knn_classify(test_point, data_points, k)
+        print(f"The predicted class for the test point {test_point} is: {prediction}")
diff --git a/machine_learning/Data_classification/README.md b/machine_learning/Data_classification/README.md
@@ -0,0 +1,54 @@
+# Simple KNN & Decision Tree Classifiers
+
+This project demonstrates the implementation of the k-Nearest Neighbors (KNN) and Decision Tree algorithms from scratch in Python using a sample dataset of 2D points with binary class labels.
+
+## Features
+
+- **KNN Classifier:** Find the class of a test point using the majority class of the k nearest neighbors.
+- **Decision Tree Classifier:** Build a decision tree classifier based on information gain and classify new data points.
+- **Sample Dataset:** 50 manually defined (x, y, label) tuples.
+- **Multiple Test Points:** Easily classify a list of new data points.
+
+## Requirements
+
+- Python 3.x
+- (Optional) Libraries like numpy, pandas, scikit-learn, matplotlib for future enhancements.
+
+See [`requirements.txt`](requirements.txt) for details.
+
+## Usage
+
+1. Clone this repository:
+
+    ```
+    git clone https://github.com/yourusername/simple-knn-decisiontree.git
+    cd simple-knn-decisiontree
+    ```
+
+2. Run the classifiers:
+
+    ```
+    python knn_decision_tree.py
+    ```
+
+    You can modify the `test_points` list to classify different points.
+
+## Files
+
+- `knn_decision_tree.py`: Contains the implementations for KNN and Decision Tree, plus the sample dataset.
+- `requirements.txt`: (Optional) Lists external libraries for extended functionality.
+- `README.md`: Project introduction and usage.
+
+## Extending the Project
+
+- Integrate sklearn for quick benchmarking.
+- Add plotting with matplotlib to visualize the dataset and decision boundaries.
+- Use pandas for reading larger datasets from CSV files.
+
+## License
+
+MIT License
+
+---
+
+**Enjoy experimenting with classic machine learning algorithms!**
diff --git a/machine_learning/Data_classification/desiciontree.py b/machine_learning/Data_classification/desiciontree.py
@@ -0,0 +1,77 @@
+import math
+from collections import Counter
+
+def entropy(labels):
+    total = len(labels)
+    counts = Counter(labels)
+    return -sum((count/total) * math.log2(count/total) for count in counts.values())
+
+def best_split(data):
+    best_gain = 0
+    best_feature = None
+    best_threshold = None
+    base_entropy = entropy([point[-1] for point in data])
+    n_features = len(data[0]) - 1
+
+    for feature in range(n_features):
+        thresholds = set([point[feature] for point in data])
+        for threshold in thresholds:
+            left = [point for point in data if point[feature] <= threshold]
+            right = [point for point in data if point[feature] > threshold]
+            if not left or not right:
+                continue
+            p_left = len(left) / len(data)
+            p_right = len(right) / len(data)
+            gain = base_entropy - (p_left * entropy([point[-1] for point in left]) +
+                                   p_right * entropy([point[-1] for point in right]))
+            if gain > best_gain:
+                best_gain = gain
+                best_feature = feature
+                best_threshold = threshold
+    return best_feature, best_threshold
+
+def build_tree(data, depth=0, max_depth=5):
+    labels = [point[-1] for point in data]
+    if len(set(labels)) == 1 or depth >= max_depth:
+        return Counter(labels).most_common(1)[0][0]  # Leaf node
+    feature, threshold = best_split(data)
+    if feature is None:
+        return Counter(labels).most_common(1)[0][0]
+    left = [point for point in data if point[feature] <= threshold]
+    right = [point for point in data if point[feature] > threshold]
+    return {
+        "feature": feature,
+        "threshold": threshold,
+        "left": build_tree(left, depth + 1, max_depth),
+        "right": build_tree(right, depth + 1, max_depth)
+    }
+
+def predict(tree, point):
+    while isinstance(tree, dict):
+        if point[tree["feature"]] <= tree["threshold"]:
+            tree = tree["left"]
+        else:
+            tree = tree["right"]
+    return tree
+
+if __name__ == "__main__":
+    data_points = [
+        (5, 3, 0), (10, 15, 1), (8, 6, 0), (3, 7, 0), (12, 8, 1),
+        (7, 14, 1), (4, 5, 0), (6, 9, 0), (14, 12, 1), (9, 11, 1),
+        (11, 5, 0), (13, 7, 1), (6, 10, 0), (8, 13, 1), (5, 14, 1),
+        (7, 4, 0), (10, 6, 0), (12, 9, 1), (9, 15, 1), (11, 13, 1),
+        (3, 5, 0), (4, 10, 0), (14, 14, 1), (13, 9, 1), (8, 8, 0),
+        (7, 7, 0), (15, 11, 1), (12, 6, 0), (9, 10, 0), (5, 13, 1),
+        (6, 12, 1), (10, 10, 0), (13, 11, 1), (4, 8, 0), (3, 9, 0),
+        (15, 13, 1), (14, 9, 1), (11, 7, 0), (8, 12, 1), (7, 13, 1),
+        (5, 10, 0), (6, 11, 0), (9, 7, 0), (10, 9, 0), (12, 15, 1),
+        (14, 13, 1), (15, 10, 1), (11, 12, 1), (4, 6, 0), (3, 8, 0)
+    ]
+    tree = build_tree(data_points, max_depth=5)
+    test_points = [
+        (2, 1), (5, 5), (11, 8), (7, 12), (14, 13), 
+        (10, 9), (6, 10), (12, 12), (4, 7), (13, 14)
+    ]
+    for test_point in test_points:
+        result = predict(tree, test_point)
+        print(f"Predicted class for {test_point}: {result}")
diff --git a/machine_learning/Data_classification/requirements.txt b/machine_learning/Data_classification/requirements.txt
@@ -0,0 +1,4 @@
+numpy
+scikit-learn
+pandas
+matplotlib