bstaber · bstaber · Aug 27, 2025 · Aug 26, 2025 · Aug 27, 2025 · Aug 27, 2025
diff --git a/Justfile b/Justfile
@@ -53,3 +53,8 @@ fmt:
 fmt-check:
     find crates -type f \( -name '*.cpp' -o -name '*.hpp' \) -exec clang-format --dry-run --Werror {} +
 
+# -------------------------
+# Book
+# -------------------------
+serve-book:
+    mdbook serve book
diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md
@@ -1,5 +1,3 @@
 - [Introduction](introduction.md)
 - [Kalman filter](kf_linear.md)
-    - [Class definition]()
-    - [Class implementation]()
-    - [Python bindings]()
+- [Simple optimizers](simple_optimizers.md)
diff --git a/book/src/simple_optimizers.md b/book/src/simple_optimizers.md
@@ -0,0 +1,116 @@
+# Optimizers
+
+This chapter documents the small optimization module used in the project: a minimal runtime‑polymorphic interface `Optimizer` with two concrete implementations, Gradient Descent and Momentum. It is designed for clarity and easy swapping of algorithms in training loops.
+
+
+## Problem setting
+
+Given parameters $\mathbf{w}\in\mathbb{R}^d$ and a loss $\mathcal{L}(\mathbf{w})$, an optimizer updates weights using the gradient
+$$
+\mathbf{g}_t=\nabla_{\mathbf{w}}\mathcal{L}(\mathbf{w}_t).
+$$
+Each algorithm defines an update rule $\mathbf{w}_{t+1} = \Phi(\mathbf{w}_t,\mathbf{g}_t,\theta)$ with hyper‑parameters $\theta$ (e.g., learning rate, momentum).
+
+
+## API overview
+
+<details>
+<summary>Click here to view the full implementation: <b>include/cppx/opt/optimizers.hpp</b>. We break into down in the sequel of this section. </summary>
+
+```cpp
+{{#include ../../crates/simple_optimizers/include/optimizers.hpp}}
+```
+</details>
+
+Design choices
+- A small virtual interface to enable swapping algorithms at runtime.
+- `std::unique_ptr<Optimizer>` for owning polymorphism; borrowing functions accept `Optimizer&`.
+- Exceptions (`std::invalid_argument`) signal size mismatches.
+
+
+## Gradient descent
+
+Update rule
+$$
+\mathbf{w}_{t+1}=\mathbf{w}_{t}-\eta\,\mathbf{g}_t ,
+$$
+with learning rate $\eta>0$.
+
+Implementation
+```cpp
+void GradientDescent::step(std::vector<double>& w,
+                           const std::vector<double>& g) {
+  if (w.size() != g.size()) throw std::invalid_argument("size mismatch");
+  for (std::size_t i = 0; i < w.size(); ++i) {
+    w[i] -= lr_ * g[i];
+  }
+}
+```
+
+## Momentum-based gradient descent
+
+Update rule
+$$
+\begin{aligned}
+\mathbf{v}_{t+1} &= \mu\,\mathbf{v}_{t} + \eta\,\mathbf{g}_t, \\\\
+\mathbf{w}_{t+1} &= \mathbf{w}_{t} - \mathbf{v}_{t+1},
+\end{aligned}
+$$
+with momentum $\mu\in[0,1)$ and learning rate $\eta>0$.
+
+Implementation
+```cpp
+Momentum::Momentum(double learning_rate, double momentum, std::size_t dim)
+  : lr_(learning_rate), mu_(momentum), v_(dim, 0.0) {}
+
+void Momentum::step(std::vector<double>& w, const std::vector<double>& g) {
+  if (w.size() != g.size()) throw std::invalid_argument("size mismatch");
+  if (v_.size() != w.size()) throw std::invalid_argument("velocity size mismatch");
+
+  for (std::size_t i = 0; i < w.size(); ++i) {
+    v_[i] = mu_ * v_[i] + lr_ * g[i];
+    w[i] -= v_[i];
+  }
+}
+```
+
+## Using the optimizers
+
+### Owning an optimizer (runtime polymorphism)
+
+```cpp
+#include <memory>
+#include "cppx/opt/optimizers.hpp"
+
+using namespace cppx::opt;
+
+std::vector<double> w(d, 0.0), g(d, 0.0);
+
+// Choose an algorithm at runtime:
+std::unique_ptr<Optimizer> opt =
+    std::make_unique<Momentum>(/*lr=*/0.1, /*mu=*/0.9, /*dim=*/w.size());
+
+for (int epoch = 0; epoch < 100; ++epoch) {
+  // ... compute gradients into g ...
+  opt->step(w, g);           // updates w in place
+}
+```
+
+### Borrowing an optimizer (no ownership transfer)
+
+```cpp
+void train_one_epoch(Optimizer& opt,
+                     std::vector<double>& w,
+                     std::vector<double>& g) {
+  // ... fill g ...
+  opt.step(w, g);
+}
+```
+
+### API variations (optional)
+
+If C++20 is available, `std::span` can make the interface container‑agnostic:
+
+```cpp
+// virtual void step(std::span<double> w, std::span<const double> g) = 0;
+```
diff --git a/crates/CMakeLists.txt b/crates/CMakeLists.txt
@@ -1 +1,2 @@
-add_subdirectory("kf_linear")
+add_subdirectory("kf_linear")
+add_subdirectory("simple_optimizers")
diff --git a/crates/kf_linear/include/kf_linear.hpp b/crates/kf_linear/include/kf_linear.hpp
@@ -1,10 +1,10 @@
 #pragma once
 
 #include <Eigen/Dense>
+#include <iostream>
 #include <optional>
-#include <vector>
 #include <stdexcept>
-#include <iostream>
+#include <vector>
 
 /**
  * @brief Generic linear Kalman filter (templated, no control term).
@@ -17,43 +17,34 @@
  *   Nx = state dimension      (int or Eigen::Dynamic)
  *   Ny = measurement dimension(int or Eigen::Dynamic)
  */
-template<int Nx, int Ny>
-class KFLinear {
-public:
+template <int Nx, int Ny> class KFLinear {
+  public:
     using StateVec = Eigen::Matrix<double, Nx, 1>;
     using StateMat = Eigen::Matrix<double, Nx, Nx>;
-    using MeasVec  = Eigen::Matrix<double, Ny, 1>;
-    using MeasMat  = Eigen::Matrix<double, Ny, Ny>;
-    using ObsMat   = Eigen::Matrix<double, Ny, Nx>;
+    using MeasVec = Eigen::Matrix<double, Ny, 1>;
+    using MeasMat = Eigen::Matrix<double, Ny, Ny>;
+    using ObsMat = Eigen::Matrix<double, Ny, Nx>;
 
     /// Construct filter with initial condition and model matrices.
-    KFLinear(const StateVec& initial_state,
-             const StateMat& initial_covariance,
-             const StateMat& transition_matrix,
-             const ObsMat&   observation_matrix,
-             const StateMat& process_covariance,
-             const MeasMat&  measurement_covariance)
-        : x_(initial_state),
-          P_(initial_covariance),
-          A_(transition_matrix),
-          H_(observation_matrix),
-          Q_(process_covariance),
-          R_(measurement_covariance)
-        {
-            std::cout << "DEBUG" << std::endl;
-            const auto n = x_.rows();
-            if (A_.rows() != n || A_.cols() != n)
-                throw std::invalid_argument("A must be n×n and match x dimension");
-            if (P_.rows() != n || P_.cols() != n)
-                throw std::invalid_argument("P must be n×n");
-            if (Q_.rows() != n || Q_.cols() != n)
-                throw std::invalid_argument("Q must be n×n");
-            if (H_.cols() != n)
-                throw std::invalid_argument("H must have n columns");
-            const auto m = H_.rows();
-            if (R_.rows() != m || R_.cols() != m)
-                throw std::invalid_argument("R must be m×m with m = H.rows()");
-        }
+    KFLinear(const StateVec &initial_state, const StateMat &initial_covariance,
+             const StateMat &transition_matrix, const ObsMat &observation_matrix,
+             const StateMat &process_covariance, const MeasMat &measurement_covariance)
+        : x_(initial_state), P_(initial_covariance), A_(transition_matrix), H_(observation_matrix),
+          Q_(process_covariance), R_(measurement_covariance) {
+        std::cout << "DEBUG" << std::endl;
+        const auto n = x_.rows();
+        if (A_.rows() != n || A_.cols() != n)
+            throw std::invalid_argument("A must be n×n and match x dimension");
+        if (P_.rows() != n || P_.cols() != n)
+            throw std::invalid_argument("P must be n×n");
+        if (Q_.rows() != n || Q_.cols() != n)
+            throw std::invalid_argument("Q must be n×n");
+        if (H_.cols() != n)
+            throw std::invalid_argument("H must have n columns");
+        const auto m = H_.rows();
+        if (R_.rows() != m || R_.cols() != m)
+            throw std::invalid_argument("R must be m×m with m = H.rows()");
+    }
 
     /// Predict step (no control).
     void predict() {
@@ -62,7 +53,7 @@ class KFLinear {
     }
 
     /// Update step with a measurement z.
-    void update(const MeasVec& z) {
+    void update(const MeasVec &z) {
         // Innovation
         MeasVec nu = z - H_ * x_;
 
@@ -76,9 +67,8 @@ class KFLinear {
         }
 
         // K = P H^T S^{-1}   via solve: S * (K^T) = (P H^T)^T
-        const auto PHt = P_ * H_.transpose();                        // (Nx × Ny)
-        Eigen::Matrix<double, Nx, Ny> K =
-            ldlt.solve(PHt.transpose()).transpose();                 // (Nx × Ny)
+        const auto PHt = P_ * H_.transpose();                                      // (Nx × Ny)
+        Eigen::Matrix<double, Nx, Ny> K = ldlt.solve(PHt.transpose()).transpose(); // (Nx × Ny)
 
         // State update
         x_ += K * nu;
@@ -92,42 +82,42 @@ class KFLinear {
     }
 
     /// One full step: predict then (optionally) update.
-    void step(const std::optional<MeasVec>& measurement) {
+    void step(const std::optional<MeasVec> &measurement) {
         predict();
         if (measurement) {
             update(*measurement);
         }
     }
 
     /// Run over a sequence of (optional) measurements.
-    std::vector<StateVec> filter(const std::vector<std::optional<MeasVec>>& measurements) {
+    std::vector<StateVec> filter(const std::vector<std::optional<MeasVec>> &measurements) {
         std::vector<StateVec> out;
         out.reserve(measurements.size());
-        for (const auto& z : measurements) {
+        for (const auto &z : measurements) {
             step(z);
             out.push_back(x_);
         }
         return out;
     }
 
     // Accessors
-    [[nodiscard]] const StateVec& state()      const { return x_; }
-    [[nodiscard]] const StateMat& covariance() const { return P_; }
+    [[nodiscard]] const StateVec &state() const { return x_; }
+    [[nodiscard]] const StateMat &covariance() const { return P_; }
 
     // (Optional) setters if you want to tweak model online
-    void set_transition(const StateMat& A) { A_ = A; }
-    void set_observation(const ObsMat& H)  { H_ = H; }
-    void set_process_noise(const StateMat& Q) { Q_ = Q; }
-    void set_measurement_noise(const MeasMat& R) { R_ = R; }
+    void set_transition(const StateMat &A) { A_ = A; }
+    void set_observation(const ObsMat &H) { H_ = H; }
+    void set_process_noise(const StateMat &Q) { Q_ = Q; }
+    void set_measurement_noise(const MeasMat &R) { R_ = R; }
 
-private:
+  private:
     // Model
-    StateMat A_;  ///< State transition
-    ObsMat   H_;  ///< Observation
-    StateMat Q_;  ///< Process noise covariance
-    MeasMat  R_;  ///< Measurement noise covariance
+    StateMat A_; ///< State transition
+    ObsMat H_;   ///< Observation
+    StateMat Q_; ///< Process noise covariance
+    MeasMat R_;  ///< Measurement noise covariance
 
     // Estimates
-    StateVec x_;  ///< State mean
-    StateMat P_;  ///< State covariance
+    StateVec x_; ///< State mean
+    StateMat P_; ///< State covariance
 };