subbarayudu-j · shiva-yadav-ds · Nov 15, 2025
diff --git a/statistics/__init__.py b/statistics/__init__.py
diff --git a/statistics/central_tendency.py b/statistics/central_tendency.py
@@ -0,0 +1,48 @@
+# central_tendency.py
+from typing import Iterable, List, Union
+
+Number = Union[int, float]
+
+
+def mean(values: Iterable[Number]) -> float:
+    """Return arithmetic mean of values. Raises ValueError if empty."""
+    vals = list(values)
+    if not vals:
+        raise ValueError("mean() arg is an empty sequence")
+    return round(sum(vals) / len(vals), 2)
+
+
+def median(values: Iterable[Number]) -> float:
+    """Return median. For even length returns the average of two middle values."""
+    vals = sorted(values)
+    if not vals:
+        raise ValueError("median() arg is an empty sequence")
+    n = len(vals)
+    mid = n // 2
+    if n % 2 == 1:
+        return float(vals[mid])
+    return (vals[mid - 1] + vals[mid]) / 2.0
+
+
+def mode(values: Iterable[Number]) -> Union[Number, List[Number]]:
+    """
+    Return the mode value if unique, otherwise return a list of modes.
+    Example: [1,2,2,3] -> 2 ; [1,1,2,2] -> [1,2]
+    """
+    vals = list(values)
+    if not vals:
+        raise ValueError("mode() arg is an empty sequence")
+    freq = {}
+    for v in vals:
+        freq[v] = freq.get(v, 0) + 1
+    max_count = max(freq.values())
+    modes = [k for k, count in freq.items() if count == max_count]
+    return modes[0] if len(modes) == 1 else modes
+
+
+# ---------- Quick usage ----------
+if __name__ == "__main__":
+    sample = [2, 5, 1, 2, 3, 5, 2]
+    print("mean:", mean(sample))
+    print("median:", median(sample))
+    print("mode:", mode(sample))
diff --git a/statistics/dispersion.py b/statistics/dispersion.py
@@ -0,0 +1,77 @@
+# dispersion.py
+from typing import Iterable, List, Union
+import math
+
+Number = Union[int, float]
+
+
+def _to_list(values: Iterable[Number]) -> List[float]:
+    vals = list(values)
+    if not vals:
+        raise ValueError("sequence is empty")
+    return [float(x) for x in vals]
+
+
+def data_range(values: Iterable[Number]) -> float:
+    """Return range = max - min."""
+    vals = _to_list(values)
+    return max(vals) - min(vals)
+
+
+def variance(values: Iterable[Number], sample: bool = True) -> float:
+    """
+    Return variance. By default sample=True uses sample variance (n-1).
+    Use sample=False for population variance (n).
+    """
+    vals = _to_list(values)
+    n = len(vals)
+    if sample and n < 2:
+        raise ValueError("sample variance requires at least two data points")
+    mean_val = sum(vals) / n
+    ssd = sum((x - mean_val) ** 2 for x in vals)
+    denom = n - 1 if sample else n
+    return round(ssd / denom, 2)
+
+
+def stdev(values: Iterable[Number], sample: bool = True) -> float:
+    """Return standard deviation (sqrt of variance)."""
+    final_val = math.sqrt(variance(values, sample=sample))
+    return round(final_val, 2)
+
+
+def iqr(values: Iterable[Number]) -> float:
+    """
+    Return interquartile range (Q3 - Q1).
+    Uses simple median-of-halves method (consistent with many textbooks).
+    """
+    vals = sorted(_to_list(values))
+    n = len(vals)
+    mid = n // 2
+
+    if n % 2 == 0:
+        lower = vals[:mid]
+        upper = vals[mid:]
+    else:
+        lower = vals[:mid]      # excludes median
+        upper = vals[mid + 1:]  # excludes median
+
+    def _median(arr: List[float]) -> float:
+        m = len(arr)
+        if m == 0:
+            return 0.0
+        mid_i = m // 2
+        return arr[mid_i] if m % 2 == 1 else (arr[mid_i - 1] + arr[mid_i]) / 2.0
+
+    q1 = _median(lower)
+    q3 = _median(upper)
+    return q3 - q1
+
+
+# ---------- Quick usage ----------
+if __name__ == "__main__":
+    sample = [1, 2, 2, 3, 4, 7, 9]
+    print("range:", data_range(sample))
+    print("sample variance:", variance(sample, sample=True))
+    print("population variance:", variance(sample, sample=False))
+    print("sample stdev:", stdev(sample, sample=True))
+    print("IQR:", iqr(sample))
diff --git a/statistics/frequency_stats.py b/statistics/frequency_stats.py
@@ -0,0 +1,68 @@
+# frequency_stats.py
+from typing import Iterable, Dict, List, Tuple, Union
+from collections import Counter
+
+Number = Union[int, float]
+
+
+def frequency_table(values: Iterable[Number]) -> Dict[Number, int]:
+    """Return a frequency table (value -> count)."""
+    vals = list(values)
+    if not vals:
+        return {}
+    return dict(Counter(vals))
+
+
+def relative_frequency(values: Iterable[Number]) -> Dict[Number, float]:
+    """Return relative frequencies (value -> proportion)."""
+    vals = list(values)
+    n = len(vals)
+    if n == 0:
+        return {}
+    cnt = Counter(vals)
+    return {k: v / n for k, v in cnt.items()}
+
+
+def cumulative_frequency(values: Iterable[Number]) -> List[Tuple[Number, int]]:
+    """
+    Return a sorted list of (value, cumulative_count).
+    Example: [1,1,2,3] -> [(1,2),(2,3),(3,4)]
+    """
+    vals = sorted(list(values))
+    if not vals:
+        return []
+    cnt = Counter(vals)
+    items = sorted(cnt.items())
+    cum = []
+    running = 0
+    for val, count in items:
+        running += count
+        cum.append((val, running))
+    return cum
+
+
+def top_k_modes(values: Iterable[Number], k: int = 1) -> List[Number]:
+    """
+    Return top-k most frequent values (ties allowed). If k=1 returns list of top mode(s).
+    """
+    if k < 1:
+        raise ValueError("k must be >= 1")
+    vals = list(values)
+    if not vals:
+        return []
+    cnt = Counter(vals)
+    most_common = cnt.most_common()
+    # Determine cutoff frequency for top-k positions
+    if k >= len(most_common):
+        return [val for val, _ in most_common]
+    cutoff = most_common[k - 1][1]
+    return [val for val, c in most_common if c >= cutoff]
+
+
+# ---------- Quick usage ----------
+if __name__ == "__main__":
+    data = [1, 2, 2, 3, 3, 3, 4]
+    print("freq table:", frequency_table(data))
+    print("relative freq:", relative_frequency(data))
+    print("cumulative freq:", cumulative_frequency(data))
+    print("top 2 modes:", top_k_modes(data, k=2))
diff --git a/statistics/probability_basic.py b/statistics/probability_basic.py
@@ -0,0 +1,57 @@
+# probability_basic.py
+from typing import Iterable
+import math
+from collections import Counter
+
+def factorial(n: int) -> int:
+    """Return n! for non-negative integer n."""
+    if n < 0:
+        raise ValueError("factorial() not defined for negative values")
+    return math.prod(range(1, n + 1)) if n > 0 else 1
+
+
+def permutations(n: int, r: int) -> int:
+    """Return P(n, r) = n! / (n-r)!"""
+    if not (0 <= r <= n):
+        raise ValueError("require 0 <= r <= n")
+    return factorial(n) // factorial(n - r)
+
+
+def combinations(n: int, r: int) -> int:
+    """Return C(n, r) = n! / (r! (n-r)!)"""
+    if not (0 <= r <= n):
+        raise ValueError("require 0 <= r <= n")
+    return factorial(n) // (factorial(r) * factorial(n - r))
+
+
+def binomial_pmf(k: int, n: int, p: float) -> float:
+    """
+    Return binomial probability P(X = k) for X ~ Binomial(n, p)
+    """
+    if not (0 <= k <= n):
+        raise ValueError("k must be between 0 and n")
+    if not (0.0 <= p <= 1.0):
+        raise ValueError("p must be between 0 and 1")
+    return combinations(n, k) * (p ** k) * ((1 - p) ** (n - k))
+
+
+def empirical_probability(event_values: Iterable, sample_space_values: Iterable) -> float:
+    """
+    Compute empirical probability of event_values within sample_space_values.
+    event_values may be a subset of sample_space_values (both iterable of outcomes).
+    Example: empirical_probability([1,1],[1,1,2,3]) -> 0.5
+    """
+    s = list(sample_space_values)
+    if not s:
+        raise ValueError("sample space cannot be empty")
+    event_count = sum(1 for x in s if x in set(event_values))
+    return event_count / len(s)
+
+
+# ---------- Quick usage ----------
+if __name__ == "__main__":
+    print("5! =", factorial(5))
+    print("P(5,2) =", permutations(5, 2))
+    print("C(5,2) =", combinations(5, 2))
+    print("Binomial P(X=2; n=5, p=0.3) =", binomial_pmf(2, 5, 0.3))
+    print("Empirical prob of [1] in [1,1,2,3] =", empirical_probability([1], [1,1,2,3]))