From 022e7f7eb4dfba6825ab96f0e11996c6cca1326c Mon Sep 17 00:00:00 2001
From: Brian Staber <brian.staber@gmail.com>
Date: Tue, 26 Aug 2025 22:37:37 +0200
Subject: [PATCH 1/3] :rocket: Add simple-optimizers example

---
 crates/CMakeLists.txt                         |   3 +-
 crates/kf_linear/include/kf_linear.hpp        | 100 +++++++--------
 crates/kf_linear/tests/test_kf_linear.cpp     | 118 ++++++++++--------
 crates/simple_optimizers/CMakeLists.txt       |  19 +++
 .../simple_optimizers/include/optimizers.hpp  |  70 +++++++++++
 crates/simple_optimizers/src/main.cpp         |   6 +
 crates/simple_optimizers/tests/CMakeLists.txt |   7 ++
 .../tests/test_simple_optimizers.cpp          |  58 +++++++++
 8 files changed, 272 insertions(+), 109 deletions(-)
 create mode 100644 crates/simple_optimizers/CMakeLists.txt
 create mode 100644 crates/simple_optimizers/include/optimizers.hpp
 create mode 100644 crates/simple_optimizers/src/main.cpp
 create mode 100644 crates/simple_optimizers/tests/CMakeLists.txt
 create mode 100644 crates/simple_optimizers/tests/test_simple_optimizers.cpp
diff --git a/crates/CMakeLists.txt b/crates/CMakeLists.txt
index aa3e246..678104b 100644
--- a/crates/CMakeLists.txt
+++ b/crates/CMakeLists.txt
@@ -1 +1,2 @@
-add_subdirectory("kf_linear")
\ No newline at end of file
+add_subdirectory("kf_linear")
+add_subdirectory("simple_optimizers")
\ No newline at end of file
diff --git a/crates/kf_linear/include/kf_linear.hpp b/crates/kf_linear/include/kf_linear.hpp
index ee19397..fcb3fd0 100644
--- a/crates/kf_linear/include/kf_linear.hpp
+++ b/crates/kf_linear/include/kf_linear.hpp
@@ -1,10 +1,10 @@
 #pragma once
 
 #include <Eigen/Dense>
+#include <iostream>
 #include <optional>
-#include <vector>
 #include <stdexcept>
-#include <iostream>
+#include <vector>
 
 /**
  * @brief Generic linear Kalman filter (templated, no control term).
@@ -17,43 +17,34 @@
  *   Nx = state dimension      (int or Eigen::Dynamic)
  *   Ny = measurement dimension(int or Eigen::Dynamic)
  */
-template<int Nx, int Ny>
-class KFLinear {
-public:
+template <int Nx, int Ny> class KFLinear {
+  public:
     using StateVec = Eigen::Matrix<double, Nx, 1>;
     using StateMat = Eigen::Matrix<double, Nx, Nx>;
-    using MeasVec  = Eigen::Matrix<double, Ny, 1>;
-    using MeasMat  = Eigen::Matrix<double, Ny, Ny>;
-    using ObsMat   = Eigen::Matrix<double, Ny, Nx>;
+    using MeasVec = Eigen::Matrix<double, Ny, 1>;
+    using MeasMat = Eigen::Matrix<double, Ny, Ny>;
+    using ObsMat = Eigen::Matrix<double, Ny, Nx>;
 
     /// Construct filter with initial condition and model matrices.
-    KFLinear(const StateVec& initial_state,
-             const StateMat& initial_covariance,
-             const StateMat& transition_matrix,
-             const ObsMat&   observation_matrix,
-             const StateMat& process_covariance,
-             const MeasMat&  measurement_covariance)
-        : x_(initial_state),
-          P_(initial_covariance),
-          A_(transition_matrix),
-          H_(observation_matrix),
-          Q_(process_covariance),
-          R_(measurement_covariance)
-        {
-            std::cout << "DEBUG" << std::endl;
-            const auto n = x_.rows();
-            if (A_.rows() != n || A_.cols() != n)
-                throw std::invalid_argument("A must be n×n and match x dimension");
-            if (P_.rows() != n || P_.cols() != n)
-                throw std::invalid_argument("P must be n×n");
-            if (Q_.rows() != n || Q_.cols() != n)
-                throw std::invalid_argument("Q must be n×n");
-            if (H_.cols() != n)
-                throw std::invalid_argument("H must have n columns");
-            const auto m = H_.rows();
-            if (R_.rows() != m || R_.cols() != m)
-                throw std::invalid_argument("R must be m×m with m = H.rows()");
-        }
+    KFLinear(const StateVec &initial_state, const StateMat &initial_covariance,
+             const StateMat &transition_matrix, const ObsMat &observation_matrix,
+             const StateMat &process_covariance, const MeasMat &measurement_covariance)
+        : x_(initial_state), P_(initial_covariance), A_(transition_matrix), H_(observation_matrix),
+          Q_(process_covariance), R_(measurement_covariance) {
+        std::cout << "DEBUG" << std::endl;
+        const auto n = x_.rows();
+        if (A_.rows() != n || A_.cols() != n)
+            throw std::invalid_argument("A must be n×n and match x dimension");
+        if (P_.rows() != n || P_.cols() != n)
+            throw std::invalid_argument("P must be n×n");
+        if (Q_.rows() != n || Q_.cols() != n)
+            throw std::invalid_argument("Q must be n×n");
+        if (H_.cols() != n)
+            throw std::invalid_argument("H must have n columns");
+        const auto m = H_.rows();
+        if (R_.rows() != m || R_.cols() != m)
+            throw std::invalid_argument("R must be m×m with m = H.rows()");
+    }
 
     /// Predict step (no control).
     void predict() {
@@ -62,7 +53,7 @@ class KFLinear {
     }
 
     /// Update step with a measurement z.
-    void update(const MeasVec& z) {
+    void update(const MeasVec &z) {
         // Innovation
         MeasVec nu = z - H_ * x_;
 
@@ -76,9 +67,8 @@ class KFLinear {
         }
 
         // K = P H^T S^{-1}   via solve: S * (K^T) = (P H^T)^T
-        const auto PHt = P_ * H_.transpose();                        // (Nx × Ny)
-        Eigen::Matrix<double, Nx, Ny> K =
-            ldlt.solve(PHt.transpose()).transpose();                 // (Nx × Ny)
+        const auto PHt = P_ * H_.transpose();                                      // (Nx × Ny)
+        Eigen::Matrix<double, Nx, Ny> K = ldlt.solve(PHt.transpose()).transpose(); // (Nx × Ny)
 
         // State update
         x_ += K * nu;
@@ -92,7 +82,7 @@ class KFLinear {
     }
 
     /// One full step: predict then (optionally) update.
-    void step(const std::optional<MeasVec>& measurement) {
+    void step(const std::optional<MeasVec> &measurement) {
         predict();
         if (measurement) {
             update(*measurement);
@@ -100,10 +90,10 @@ class KFLinear {
     }
 
     /// Run over a sequence of (optional) measurements.
-    std::vector<StateVec> filter(const std::vector<std::optional<MeasVec>>& measurements) {
+    std::vector<StateVec> filter(const std::vector<std::optional<MeasVec>> &measurements) {
         std::vector<StateVec> out;
         out.reserve(measurements.size());
-        for (const auto& z : measurements) {
+        for (const auto &z : measurements) {
             step(z);
             out.push_back(x_);
         }
@@ -111,23 +101,23 @@ class KFLinear {
     }
 
     // Accessors
-    [[nodiscard]] const StateVec& state()      const { return x_; }
-    [[nodiscard]] const StateMat& covariance() const { return P_; }
+    [[nodiscard]] const StateVec &state() const { return x_; }
+    [[nodiscard]] const StateMat &covariance() const { return P_; }
 
     // (Optional) setters if you want to tweak model online
-    void set_transition(const StateMat& A) { A_ = A; }
-    void set_observation(const ObsMat& H)  { H_ = H; }
-    void set_process_noise(const StateMat& Q) { Q_ = Q; }
-    void set_measurement_noise(const MeasMat& R) { R_ = R; }
+    void set_transition(const StateMat &A) { A_ = A; }
+    void set_observation(const ObsMat &H) { H_ = H; }
+    void set_process_noise(const StateMat &Q) { Q_ = Q; }
+    void set_measurement_noise(const MeasMat &R) { R_ = R; }
 
-private:
+  private:
     // Model
-    StateMat A_;  ///< State transition
-    ObsMat   H_;  ///< Observation
-    StateMat Q_;  ///< Process noise covariance
-    MeasMat  R_;  ///< Measurement noise covariance
+    StateMat A_; ///< State transition
+    ObsMat H_;   ///< Observation
+    StateMat Q_; ///< Process noise covariance
+    MeasMat R_;  ///< Measurement noise covariance
 
     // Estimates
-    StateVec x_;  ///< State mean
-    StateMat P_;  ///< State covariance
+    StateVec x_; ///< State mean
+    StateMat P_; ///< State covariance
 };
diff --git a/crates/kf_linear/tests/test_kf_linear.cpp b/crates/kf_linear/tests/test_kf_linear.cpp
index f546671..daf6506 100644
--- a/crates/kf_linear/tests/test_kf_linear.cpp
+++ b/crates/kf_linear/tests/test_kf_linear.cpp
@@ -1,7 +1,7 @@
 // tests/test_kalman_filter.cpp
-#include <catch2/catch_test_macros.hpp>
-#include <catch2/catch_approx.hpp>
 #include <Eigen/Dense>
+#include <catch2/catch_approx.hpp>
+#include <catch2/catch_test_macros.hpp>
 #include <optional>
 #include <vector>
 
@@ -9,24 +9,25 @@
 
 using Catch::Approx;
 
-TEST_CASE("Predict-only step leaves state at A*x and P -> A P A^T + Q")
-{
-    using KF = KFLinear<2,1>;
+TEST_CASE("Predict-only step leaves state at A*x and P -> A P A^T + Q") {
+    using KF = KFLinear<2, 1>;
     using StateVec = typename KF::StateVec;
     using StateMat = typename KF::StateMat;
-    using ObsMat   = typename KF::ObsMat;
-    using MeasMat  = typename KF::MeasMat;
-    using MeasVec  = typename KF::MeasVec;
-
-    StateVec x0; x0 << 2.0, -1.0;
-    StateMat P0; P0 << 1.0, 0.2,
-                       0.2, 2.0;
-
-    StateMat A;  A << 1.0, 1.0,
-                      0.0, 1.0;   // constant-velocity
-    ObsMat   H;  H << 1.0, 0.0;
+    using ObsMat = typename KF::ObsMat;
+    using MeasMat = typename KF::MeasMat;
+    using MeasVec = typename KF::MeasVec;
+
+    StateVec x0;
+    x0 << 2.0, -1.0;
+    StateMat P0;
+    P0 << 1.0, 0.2, 0.2, 2.0;
+
+    StateMat A;
+    A << 1.0, 1.0, 0.0, 1.0; // constant-velocity
+    ObsMat H;
+    H << 1.0, 0.0;
     StateMat Q = 0.1 * StateMat::Identity();
-    MeasMat  R = MeasMat::Identity();
+    MeasMat R = MeasMat::Identity();
 
     KF kf(x0, P0, A, H, Q, R);
 
@@ -39,72 +40,83 @@ TEST_CASE("Predict-only step leaves state at A*x and P -> A P A^T + Q")
     REQUIRE(kf.covariance().isApprox(P_expected, 1e-12));
 }
 
-TEST_CASE("Scalar KF update matches closed-form")
-{
-    using KF = KFLinear<1,1>;
+TEST_CASE("Scalar KF update matches closed-form") {
+    using KF = KFLinear<1, 1>;
     using StateVec = typename KF::StateVec;
     using StateMat = typename KF::StateMat;
-    using ObsMat   = typename KF::ObsMat;
-    using MeasMat  = typename KF::MeasMat;
-    using MeasVec  = typename KF::MeasVec;
+    using ObsMat = typename KF::ObsMat;
+    using MeasMat = typename KF::MeasMat;
+    using MeasVec = typename KF::MeasVec;
 
     // 1D model: x_k = x_{k-1} + w, z_k = x_k + v
-    StateVec x0; x0 << 0.0;
-    StateMat P0; P0 << 1.0;
-
-    StateMat A; A << 1.0;
-    ObsMat   H; H << 1.0;
-    StateMat Q; Q << 0.25;    // process noise var
-    MeasMat  R; R << 1.0;     // measurement noise var
+    StateVec x0;
+    x0 << 0.0;
+    StateMat P0;
+    P0 << 1.0;
+
+    StateMat A;
+    A << 1.0;
+    ObsMat H;
+    H << 1.0;
+    StateMat Q;
+    Q << 0.25; // process noise var
+    MeasMat R;
+    R << 1.0; // measurement noise var
 
     KF kf(x0, P0, A, H, Q, R);
 
     // Step 1: no measurement
     kf.step(std::nullopt);
-    double Pp = 1.0 + 0.25;  // P' = P0 + Q
-    double xp = 0.0;         // x' = x0
+    double Pp = 1.0 + 0.25; // P' = P0 + Q
+    double xp = 0.0;        // x' = x0
     REQUIRE(kf.state()(0) == Approx(xp));
-    REQUIRE(kf.covariance()(0,0) == Approx(Pp));
+    REQUIRE(kf.covariance()(0, 0) == Approx(Pp));
 
     // Step 2: with measurement z = 1.2
-    MeasVec z; z << 1.2;
+    MeasVec z;
+    z << 1.2;
     kf.step(z);
 
     // After second predict: Ppred = Pp + Q
-    double Ppred = Pp + 0.25;     // = 1.5
+    double Ppred = Pp + 0.25; // = 1.5
     double K = Ppred / (Ppred + 1.0);
     double x_post = xp + K * (1.2 - xp);
-    double P_post = (1.0 - K) * Ppred;  // Joseph reduces to this in scalar
+    double P_post = (1.0 - K) * Ppred; // Joseph reduces to this in scalar
 
     REQUIRE(kf.state()(0) == Approx(x_post).epsilon(1e-12));
-    REQUIRE(kf.covariance()(0,0) == Approx(P_post).epsilon(1e-12));
+    REQUIRE(kf.covariance()(0, 0) == Approx(P_post).epsilon(1e-12));
 }
 
-TEST_CASE("Batch filter handles missing and present measurements")
-{
-    using KF = KFLinear<2,1>;
+TEST_CASE("Batch filter handles missing and present measurements") {
+    using KF = KFLinear<2, 1>;
     using StateVec = typename KF::StateVec;
     using StateMat = typename KF::StateMat;
-    using ObsMat   = typename KF::ObsMat;
-    using MeasMat  = typename KF::MeasMat;
-    using MeasVec  = typename KF::MeasVec;
+    using ObsMat = typename KF::ObsMat;
+    using MeasMat = typename KF::MeasMat;
+    using MeasVec = typename KF::MeasVec;
 
-    StateVec x0; x0 << 0.0, 0.0;
+    StateVec x0;
+    x0 << 0.0, 0.0;
     StateMat P0 = StateMat::Identity();
 
-    StateMat A;  A << 1.0, 1.0,
-                      0.0, 1.0;
-    ObsMat   H;  H << 1.0, 0.0;
-    StateMat Q  = 0.01 * StateMat::Identity();
-    MeasMat  R  = 0.25 * MeasMat::Identity();
+    StateMat A;
+    A << 1.0, 1.0, 0.0, 1.0;
+    ObsMat H;
+    H << 1.0, 0.0;
+    StateMat Q = 0.01 * StateMat::Identity();
+    MeasMat R = 0.25 * MeasMat::Identity();
 
     KF kf(x0, P0, A, H, Q, R);
 
     std::vector<std::optional<MeasVec>> zs;
-    zs.push_back(std::nullopt);        // t=1: no measurement
-    MeasVec z1; z1 << 0.8; zs.push_back(z1);    // t=2
-    MeasVec z2; z2 << 1.6; zs.push_back(z2);    // t=3
-    zs.push_back(std::nullopt);        // t=4
+    zs.push_back(std::nullopt); // t=1: no measurement
+    MeasVec z1;
+    z1 << 0.8;
+    zs.push_back(z1); // t=2
+    MeasVec z2;
+    z2 << 1.6;
+    zs.push_back(z2);           // t=3
+    zs.push_back(std::nullopt); // t=4
 
     auto states = kf.filter(zs);
     REQUIRE(states.size() == zs.size());
diff --git a/crates/simple_optimizers/CMakeLists.txt b/crates/simple_optimizers/CMakeLists.txt
new file mode 100644
index 0000000..e2302ad
--- /dev/null
+++ b/crates/simple_optimizers/CMakeLists.txt
@@ -0,0 +1,19 @@
+# Header-only target
+add_library(simple_optimizers INTERFACE)
+
+# Public includes (exports the include/ dir to consumers)
+target_include_directories(simple_optimizers INTERFACE
+  ${CMAKE_CURRENT_SOURCE_DIR}/include
+)
+
+# Link Eigen and set language level
+target_link_libraries(simple_optimizers INTERFACE Eigen3::Eigen)
+target_compile_features(simple_optimizers INTERFACE cxx_std_17)
+
+# Demo executable
+add_executable(opt_demo src/main.cpp)
+target_link_libraries(opt_demo PRIVATE simple_optimizers)
+
+# Tests
+enable_testing()
+add_subdirectory(tests)
diff --git a/crates/simple_optimizers/include/optimizers.hpp b/crates/simple_optimizers/include/optimizers.hpp
new file mode 100644
index 0000000..3daeb89
--- /dev/null
+++ b/crates/simple_optimizers/include/optimizers.hpp
@@ -0,0 +1,70 @@
+#pragma once
+#include <algorithm>
+#include <cmath>
+#include <stdexcept>
+#include <vector>
+
+namespace cppx::opt {
+
+class Optimizer {
+  public:
+    virtual ~Optimizer() = default;
+
+    virtual void step(std::vector<double> &weights, const std::vector<double> &grads) = 0;
+};
+
+// --------------------------- GradientDescent ---------------------------
+class GradientDescent final : public Optimizer {
+  public:
+    explicit GradientDescent(double learning_rate) : lr_(learning_rate) {}
+
+    [[nodiscard]] double learning_rate() const noexcept { return lr_; }
+
+    void step(std::vector<double> &weights, const std::vector<double> &grads) override {
+        if (weights.size() != grads.size()) {
+            throw std::invalid_argument("weights and grads size mismatch");
+        }
+        for (std::size_t i = 0; i < weights.size(); ++i) {
+            weights[i] -= lr_ * grads[i];
+        }
+    }
+
+  private:
+    double lr_{};
+};
+
+// ------------------------------- Momentum ------------------------------
+class Momentum final : public Optimizer {
+  public:
+    // struct Params {
+    //     double learning_rate;
+    //     double momentum;
+    //     std::size_t dim;
+    // };
+
+    explicit Momentum(double learning_rate, double momentum, std::size_t dim)
+        : lr_(learning_rate), mu_(momentum), v_(dim, 0.0) {}
+
+    [[nodiscard]] double learning_rate() const noexcept { return lr_; }
+    [[nodiscard]] double momentum() const noexcept { return mu_; }
+    [[nodiscard]] const std::vector<double> &velocity() const noexcept { return v_; }
+
+    void step(std::vector<double> &weights, const std::vector<double> &grads) override {
+        if (weights.size() != grads.size())
+            throw std::invalid_argument("weights and grads size mismatch");
+        if (v_.size() != weights.size())
+            throw std::invalid_argument("velocity size mismatch");
+
+        for (std::size_t i = 0; i < weights.size(); ++i) {
+            v_[i] = mu_ * v_[i] + lr_ * grads[i]; // v ← μ v + η g
+            weights[i] -= v_[i];                  // w ← w − v
+        }
+    }
+
+  private:
+    double lr_{};
+    double mu_{};
+    std::vector<double> v_;
+};
+
+} // namespace cppx::opt
diff --git a/crates/simple_optimizers/src/main.cpp b/crates/simple_optimizers/src/main.cpp
new file mode 100644
index 0000000..2ac5411
--- /dev/null
+++ b/crates/simple_optimizers/src/main.cpp
@@ -0,0 +1,6 @@
+#include <iostream>
+
+int main() {
+    std::cout << "Hello, world!" << "\n";
+    return 0;
+}
\ No newline at end of file
diff --git a/crates/simple_optimizers/tests/CMakeLists.txt b/crates/simple_optimizers/tests/CMakeLists.txt
new file mode 100644
index 0000000..d75c19c
--- /dev/null
+++ b/crates/simple_optimizers/tests/CMakeLists.txt
@@ -0,0 +1,7 @@
+# crates/simple_optimizers/tests/CMakeLists.txt
+
+add_executable(simple_optizmiers_test test_simple_optimizers.cpp)
+
+target_link_libraries(simple_optizmiers_test PRIVATE simple_optimizers)
+
+add_test(NAME simple_optimizers_test COMMAND simple_optizmiers_test)
\ No newline at end of file
diff --git a/crates/simple_optimizers/tests/test_simple_optimizers.cpp b/crates/simple_optimizers/tests/test_simple_optimizers.cpp
new file mode 100644
index 0000000..28e3b1c
--- /dev/null
+++ b/crates/simple_optimizers/tests/test_simple_optimizers.cpp
@@ -0,0 +1,58 @@
+#include "optimizers.hpp"
+#include <cassert>
+#include <cmath>
+#include <vector>
+
+using namespace cppx::opt;
+
+static bool approx_equal(double a, double b, double eps = 1e-6) {
+    return std::fabs(a - b) < eps;
+}
+
+int main() {
+    // GradientDescent constructor
+    {
+        GradientDescent opt(1e-3);
+        assert(approx_equal(opt.learning_rate(), 1e-3));
+    }
+
+    // GradientDescent step
+    {
+        GradientDescent opt(0.1);
+        std::vector<double> w{1.0, 2.0, 3.0};
+        std::vector<double> g{0.5, 0.5, 0.5};
+        opt.step(w, g);
+        assert(approx_equal(w[0], 0.95));
+        assert(approx_equal(w[1], 1.95));
+        assert(approx_equal(w[2], 2.95));
+    }
+
+    // Momentum constructor
+    {
+        Momentum opt(0.01, 0.9, 10);
+        assert(approx_equal(opt.learning_rate(), 0.01));
+        assert(approx_equal(opt.momentum(), 0.9));
+        assert(opt.velocity().size() == 10);
+    }
+
+    // Momentum step
+    {
+        Momentum opt(0.1, 0.9, 3);
+        std::vector<double> w{1.0, 2.0, 3.0};
+        std::vector<double> g{0.5, 0.5, 0.5};
+
+        opt.step(w, g);
+        // after 1st step: same as GD(0.1)
+        assert(approx_equal(w[0], 0.95));
+        assert(approx_equal(w[1], 1.95));
+        assert(approx_equal(w[2], 2.95));
+
+        opt.step(w, g);
+        // expected: [0.855, 1.855, 2.855]
+        assert(approx_equal(w[0], 0.855));
+        assert(approx_equal(w[1], 1.855));
+        assert(approx_equal(w[2], 2.855));
+    }
+
+    return 0;
+}

From ae00464a6d9090989936a3666ac7c08631c13c77 Mon Sep 17 00:00:00 2001
From: Brian Staber <brian.staber@gmail.com>
Date: Wed, 27 Aug 2025 11:17:54 +0200
Subject: [PATCH 2/3] Add some braces

---
 crates/simple_optimizers/include/optimizers.hpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/crates/simple_optimizers/include/optimizers.hpp b/crates/simple_optimizers/include/optimizers.hpp
index 3daeb89..0d1f1c0 100644
--- a/crates/simple_optimizers/include/optimizers.hpp
+++ b/crates/simple_optimizers/include/optimizers.hpp
@@ -50,10 +50,12 @@ class Momentum final : public Optimizer {
     [[nodiscard]] const std::vector<double> &velocity() const noexcept { return v_; }
 
     void step(std::vector<double> &weights, const std::vector<double> &grads) override {
-        if (weights.size() != grads.size())
+        if (weights.size() != grads.size()) {
             throw std::invalid_argument("weights and grads size mismatch");
-        if (v_.size() != weights.size())
+        }
+        if (v_.size() != weights.size()) {
             throw std::invalid_argument("velocity size mismatch");
+        }
 
         for (std::size_t i = 0; i < weights.size(); ++i) {
             v_[i] = mu_ * v_[i] + lr_ * grads[i]; // v ← μ v + η g

From d838ad06ec75c40484e3e09aa3036070684dbb1e Mon Sep 17 00:00:00 2001
From: Brian Staber <brian.staber@gmail.com>
Date: Wed, 27 Aug 2025 11:40:38 +0200
Subject: [PATCH 3/3] Add a book chapter

---
 Justfile                      |   5 ++
 book/src/SUMMARY.md           |   4 +-
 book/src/simple_optimizers.md | 116 ++++++++++++++++++++++++++++++++++
 3 files changed, 122 insertions(+), 3 deletions(-)
 create mode 100644 book/src/simple_optimizers.md

diff --git a/Justfile b/Justfile
index bb79fe9..03bf6b7 100644
--- a/Justfile
+++ b/Justfile
@@ -53,3 +53,8 @@ fmt:
 fmt-check:
     find crates -type f \( -name '*.cpp' -o -name '*.hpp' \) -exec clang-format --dry-run --Werror {} +
 
+# -------------------------
+# Book
+# -------------------------
+serve-book:
+    mdbook serve book
\ No newline at end of file
diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md
index 07384d3..17a346d 100644
--- a/book/src/SUMMARY.md
+++ b/book/src/SUMMARY.md
@@ -1,5 +1,3 @@
 - [Introduction](introduction.md)
 - [Kalman filter](kf_linear.md)
-    - [Class definition]()
-    - [Class implementation]()
-    - [Python bindings]()
\ No newline at end of file
+- [Simple optimizers](simple_optimizers.md)
\ No newline at end of file
diff --git a/book/src/simple_optimizers.md b/book/src/simple_optimizers.md
new file mode 100644
index 0000000..3e8fff7
--- /dev/null
+++ b/book/src/simple_optimizers.md
@@ -0,0 +1,116 @@
+# Optimizers
+
+This chapter documents the small optimization module used in the project: a minimal runtime‑polymorphic interface `Optimizer` with two concrete implementations, Gradient Descent and Momentum. It is designed for clarity and easy swapping of algorithms in training loops.
+
+
+## Problem setting
+
+Given parameters $\mathbf{w}\in\mathbb{R}^d$ and a loss $\mathcal{L}(\mathbf{w})$, an optimizer updates weights using the gradient
+$$
+\mathbf{g}_t=\nabla_{\mathbf{w}}\mathcal{L}(\mathbf{w}_t).
+$$
+Each algorithm defines an update rule $\mathbf{w}_{t+1} = \Phi(\mathbf{w}_t,\mathbf{g}_t,\theta)$ with hyper‑parameters $\theta$ (e.g., learning rate, momentum).
+
+
+## API overview
+
+<details>
+<summary>Click here to view the full implementation: <b>include/cppx/opt/optimizers.hpp</b>. We break into down in the sequel of this section. </summary>
+
+```cpp
+{{#include ../../crates/simple_optimizers/include/optimizers.hpp}}
+```
+</details>
+
+Design choices
+- A small virtual interface to enable swapping algorithms at runtime.
+- `std::unique_ptr<Optimizer>` for owning polymorphism; borrowing functions accept `Optimizer&`.
+- Exceptions (`std::invalid_argument`) signal size mismatches.
+
+
+## Gradient descent
+
+Update rule
+$$
+\mathbf{w}_{t+1}=\mathbf{w}_{t}-\eta\,\mathbf{g}_t ,
+$$
+with learning rate $\eta>0$.
+
+Implementation
+```cpp
+void GradientDescent::step(std::vector<double>& w,
+                           const std::vector<double>& g) {
+  if (w.size() != g.size()) throw std::invalid_argument("size mismatch");
+  for (std::size_t i = 0; i < w.size(); ++i) {
+    w[i] -= lr_ * g[i];
+  }
+}
+```
+
+## Momentum-based gradient descent
+
+Update rule
+$$
+\begin{aligned}
+\mathbf{v}_{t+1} &= \mu\,\mathbf{v}_{t} + \eta\,\mathbf{g}_t, \\\\
+\mathbf{w}_{t+1} &= \mathbf{w}_{t} - \mathbf{v}_{t+1},
+\end{aligned}
+$$
+with momentum $\mu\in[0,1)$ and learning rate $\eta>0$.
+
+Implementation
+```cpp
+Momentum::Momentum(double learning_rate, double momentum, std::size_t dim)
+  : lr_(learning_rate), mu_(momentum), v_(dim, 0.0) {}
+
+void Momentum::step(std::vector<double>& w, const std::vector<double>& g) {
+  if (w.size() != g.size()) throw std::invalid_argument("size mismatch");
+  if (v_.size() != w.size()) throw std::invalid_argument("velocity size mismatch");
+
+  for (std::size_t i = 0; i < w.size(); ++i) {
+    v_[i] = mu_ * v_[i] + lr_ * g[i];
+    w[i] -= v_[i];
+  }
+}
+```
+
+## Using the optimizers
+
+### Owning an optimizer (runtime polymorphism)
+
+```cpp
+#include <memory>
+#include "cppx/opt/optimizers.hpp"
+
+using namespace cppx::opt;
+
+std::vector<double> w(d, 0.0), g(d, 0.0);
+
+// Choose an algorithm at runtime:
+std::unique_ptr<Optimizer> opt =
+    std::make_unique<Momentum>(/*lr=*/0.1, /*mu=*/0.9, /*dim=*/w.size());
+
+for (int epoch = 0; epoch < 100; ++epoch) {
+  // ... compute gradients into g ...
+  opt->step(w, g);           // updates w in place
+}
+```
+
+### Borrowing an optimizer (no ownership transfer)
+
+```cpp
+void train_one_epoch(Optimizer& opt,
+                     std::vector<double>& w,
+                     std::vector<double>& g) {
+  // ... fill g ...
+  opt.step(w, g);
+}
+```
+
+### API variations (optional)
+
+If C++20 is available, `std::span` can make the interface container‑agnostic:
+
+```cpp
+// virtual void step(std::span<double> w, std::span<const double> g) = 0;
+```