From 022e7f7eb4dfba6825ab96f0e11996c6cca1326c Mon Sep 17 00:00:00 2001 From: Brian Staber Date: Tue, 26 Aug 2025 22:37:37 +0200 Subject: [PATCH 1/3] :rocket: Add simple-optimizers example --- crates/CMakeLists.txt | 3 +- crates/kf_linear/include/kf_linear.hpp | 100 +++++++-------- crates/kf_linear/tests/test_kf_linear.cpp | 118 ++++++++++-------- crates/simple_optimizers/CMakeLists.txt | 19 +++ .../simple_optimizers/include/optimizers.hpp | 70 +++++++++++ crates/simple_optimizers/src/main.cpp | 6 + crates/simple_optimizers/tests/CMakeLists.txt | 7 ++ .../tests/test_simple_optimizers.cpp | 58 +++++++++ 8 files changed, 272 insertions(+), 109 deletions(-) create mode 100644 crates/simple_optimizers/CMakeLists.txt create mode 100644 crates/simple_optimizers/include/optimizers.hpp create mode 100644 crates/simple_optimizers/src/main.cpp create mode 100644 crates/simple_optimizers/tests/CMakeLists.txt create mode 100644 crates/simple_optimizers/tests/test_simple_optimizers.cpp diff --git a/crates/CMakeLists.txt b/crates/CMakeLists.txt index aa3e246..678104b 100644 --- a/crates/CMakeLists.txt +++ b/crates/CMakeLists.txt @@ -1 +1,2 @@ -add_subdirectory("kf_linear") \ No newline at end of file +add_subdirectory("kf_linear") +add_subdirectory("simple_optimizers") \ No newline at end of file diff --git a/crates/kf_linear/include/kf_linear.hpp b/crates/kf_linear/include/kf_linear.hpp index ee19397..fcb3fd0 100644 --- a/crates/kf_linear/include/kf_linear.hpp +++ b/crates/kf_linear/include/kf_linear.hpp @@ -1,10 +1,10 @@ #pragma once #include +#include #include -#include #include -#include +#include /** * @brief Generic linear Kalman filter (templated, no control term). @@ -17,43 +17,34 @@ * Nx = state dimension (int or Eigen::Dynamic) * Ny = measurement dimension(int or Eigen::Dynamic) */ -template -class KFLinear { -public: +template class KFLinear { + public: using StateVec = Eigen::Matrix; using StateMat = Eigen::Matrix; - using MeasVec = Eigen::Matrix; - using MeasMat = Eigen::Matrix; - using ObsMat = Eigen::Matrix; + using MeasVec = Eigen::Matrix; + using MeasMat = Eigen::Matrix; + using ObsMat = Eigen::Matrix; /// Construct filter with initial condition and model matrices. - KFLinear(const StateVec& initial_state, - const StateMat& initial_covariance, - const StateMat& transition_matrix, - const ObsMat& observation_matrix, - const StateMat& process_covariance, - const MeasMat& measurement_covariance) - : x_(initial_state), - P_(initial_covariance), - A_(transition_matrix), - H_(observation_matrix), - Q_(process_covariance), - R_(measurement_covariance) - { - std::cout << "DEBUG" << std::endl; - const auto n = x_.rows(); - if (A_.rows() != n || A_.cols() != n) - throw std::invalid_argument("A must be n×n and match x dimension"); - if (P_.rows() != n || P_.cols() != n) - throw std::invalid_argument("P must be n×n"); - if (Q_.rows() != n || Q_.cols() != n) - throw std::invalid_argument("Q must be n×n"); - if (H_.cols() != n) - throw std::invalid_argument("H must have n columns"); - const auto m = H_.rows(); - if (R_.rows() != m || R_.cols() != m) - throw std::invalid_argument("R must be m×m with m = H.rows()"); - } + KFLinear(const StateVec &initial_state, const StateMat &initial_covariance, + const StateMat &transition_matrix, const ObsMat &observation_matrix, + const StateMat &process_covariance, const MeasMat &measurement_covariance) + : x_(initial_state), P_(initial_covariance), A_(transition_matrix), H_(observation_matrix), + Q_(process_covariance), R_(measurement_covariance) { + std::cout << "DEBUG" << std::endl; + const auto n = x_.rows(); + if (A_.rows() != n || A_.cols() != n) + throw std::invalid_argument("A must be n×n and match x dimension"); + if (P_.rows() != n || P_.cols() != n) + throw std::invalid_argument("P must be n×n"); + if (Q_.rows() != n || Q_.cols() != n) + throw std::invalid_argument("Q must be n×n"); + if (H_.cols() != n) + throw std::invalid_argument("H must have n columns"); + const auto m = H_.rows(); + if (R_.rows() != m || R_.cols() != m) + throw std::invalid_argument("R must be m×m with m = H.rows()"); + } /// Predict step (no control). void predict() { @@ -62,7 +53,7 @@ class KFLinear { } /// Update step with a measurement z. - void update(const MeasVec& z) { + void update(const MeasVec &z) { // Innovation MeasVec nu = z - H_ * x_; @@ -76,9 +67,8 @@ class KFLinear { } // K = P H^T S^{-1} via solve: S * (K^T) = (P H^T)^T - const auto PHt = P_ * H_.transpose(); // (Nx × Ny) - Eigen::Matrix K = - ldlt.solve(PHt.transpose()).transpose(); // (Nx × Ny) + const auto PHt = P_ * H_.transpose(); // (Nx × Ny) + Eigen::Matrix K = ldlt.solve(PHt.transpose()).transpose(); // (Nx × Ny) // State update x_ += K * nu; @@ -92,7 +82,7 @@ class KFLinear { } /// One full step: predict then (optionally) update. - void step(const std::optional& measurement) { + void step(const std::optional &measurement) { predict(); if (measurement) { update(*measurement); @@ -100,10 +90,10 @@ class KFLinear { } /// Run over a sequence of (optional) measurements. - std::vector filter(const std::vector>& measurements) { + std::vector filter(const std::vector> &measurements) { std::vector out; out.reserve(measurements.size()); - for (const auto& z : measurements) { + for (const auto &z : measurements) { step(z); out.push_back(x_); } @@ -111,23 +101,23 @@ class KFLinear { } // Accessors - [[nodiscard]] const StateVec& state() const { return x_; } - [[nodiscard]] const StateMat& covariance() const { return P_; } + [[nodiscard]] const StateVec &state() const { return x_; } + [[nodiscard]] const StateMat &covariance() const { return P_; } // (Optional) setters if you want to tweak model online - void set_transition(const StateMat& A) { A_ = A; } - void set_observation(const ObsMat& H) { H_ = H; } - void set_process_noise(const StateMat& Q) { Q_ = Q; } - void set_measurement_noise(const MeasMat& R) { R_ = R; } + void set_transition(const StateMat &A) { A_ = A; } + void set_observation(const ObsMat &H) { H_ = H; } + void set_process_noise(const StateMat &Q) { Q_ = Q; } + void set_measurement_noise(const MeasMat &R) { R_ = R; } -private: + private: // Model - StateMat A_; ///< State transition - ObsMat H_; ///< Observation - StateMat Q_; ///< Process noise covariance - MeasMat R_; ///< Measurement noise covariance + StateMat A_; ///< State transition + ObsMat H_; ///< Observation + StateMat Q_; ///< Process noise covariance + MeasMat R_; ///< Measurement noise covariance // Estimates - StateVec x_; ///< State mean - StateMat P_; ///< State covariance + StateVec x_; ///< State mean + StateMat P_; ///< State covariance }; diff --git a/crates/kf_linear/tests/test_kf_linear.cpp b/crates/kf_linear/tests/test_kf_linear.cpp index f546671..daf6506 100644 --- a/crates/kf_linear/tests/test_kf_linear.cpp +++ b/crates/kf_linear/tests/test_kf_linear.cpp @@ -1,7 +1,7 @@ // tests/test_kalman_filter.cpp -#include -#include #include +#include +#include #include #include @@ -9,24 +9,25 @@ using Catch::Approx; -TEST_CASE("Predict-only step leaves state at A*x and P -> A P A^T + Q") -{ - using KF = KFLinear<2,1>; +TEST_CASE("Predict-only step leaves state at A*x and P -> A P A^T + Q") { + using KF = KFLinear<2, 1>; using StateVec = typename KF::StateVec; using StateMat = typename KF::StateMat; - using ObsMat = typename KF::ObsMat; - using MeasMat = typename KF::MeasMat; - using MeasVec = typename KF::MeasVec; - - StateVec x0; x0 << 2.0, -1.0; - StateMat P0; P0 << 1.0, 0.2, - 0.2, 2.0; - - StateMat A; A << 1.0, 1.0, - 0.0, 1.0; // constant-velocity - ObsMat H; H << 1.0, 0.0; + using ObsMat = typename KF::ObsMat; + using MeasMat = typename KF::MeasMat; + using MeasVec = typename KF::MeasVec; + + StateVec x0; + x0 << 2.0, -1.0; + StateMat P0; + P0 << 1.0, 0.2, 0.2, 2.0; + + StateMat A; + A << 1.0, 1.0, 0.0, 1.0; // constant-velocity + ObsMat H; + H << 1.0, 0.0; StateMat Q = 0.1 * StateMat::Identity(); - MeasMat R = MeasMat::Identity(); + MeasMat R = MeasMat::Identity(); KF kf(x0, P0, A, H, Q, R); @@ -39,72 +40,83 @@ TEST_CASE("Predict-only step leaves state at A*x and P -> A P A^T + Q") REQUIRE(kf.covariance().isApprox(P_expected, 1e-12)); } -TEST_CASE("Scalar KF update matches closed-form") -{ - using KF = KFLinear<1,1>; +TEST_CASE("Scalar KF update matches closed-form") { + using KF = KFLinear<1, 1>; using StateVec = typename KF::StateVec; using StateMat = typename KF::StateMat; - using ObsMat = typename KF::ObsMat; - using MeasMat = typename KF::MeasMat; - using MeasVec = typename KF::MeasVec; + using ObsMat = typename KF::ObsMat; + using MeasMat = typename KF::MeasMat; + using MeasVec = typename KF::MeasVec; // 1D model: x_k = x_{k-1} + w, z_k = x_k + v - StateVec x0; x0 << 0.0; - StateMat P0; P0 << 1.0; - - StateMat A; A << 1.0; - ObsMat H; H << 1.0; - StateMat Q; Q << 0.25; // process noise var - MeasMat R; R << 1.0; // measurement noise var + StateVec x0; + x0 << 0.0; + StateMat P0; + P0 << 1.0; + + StateMat A; + A << 1.0; + ObsMat H; + H << 1.0; + StateMat Q; + Q << 0.25; // process noise var + MeasMat R; + R << 1.0; // measurement noise var KF kf(x0, P0, A, H, Q, R); // Step 1: no measurement kf.step(std::nullopt); - double Pp = 1.0 + 0.25; // P' = P0 + Q - double xp = 0.0; // x' = x0 + double Pp = 1.0 + 0.25; // P' = P0 + Q + double xp = 0.0; // x' = x0 REQUIRE(kf.state()(0) == Approx(xp)); - REQUIRE(kf.covariance()(0,0) == Approx(Pp)); + REQUIRE(kf.covariance()(0, 0) == Approx(Pp)); // Step 2: with measurement z = 1.2 - MeasVec z; z << 1.2; + MeasVec z; + z << 1.2; kf.step(z); // After second predict: Ppred = Pp + Q - double Ppred = Pp + 0.25; // = 1.5 + double Ppred = Pp + 0.25; // = 1.5 double K = Ppred / (Ppred + 1.0); double x_post = xp + K * (1.2 - xp); - double P_post = (1.0 - K) * Ppred; // Joseph reduces to this in scalar + double P_post = (1.0 - K) * Ppred; // Joseph reduces to this in scalar REQUIRE(kf.state()(0) == Approx(x_post).epsilon(1e-12)); - REQUIRE(kf.covariance()(0,0) == Approx(P_post).epsilon(1e-12)); + REQUIRE(kf.covariance()(0, 0) == Approx(P_post).epsilon(1e-12)); } -TEST_CASE("Batch filter handles missing and present measurements") -{ - using KF = KFLinear<2,1>; +TEST_CASE("Batch filter handles missing and present measurements") { + using KF = KFLinear<2, 1>; using StateVec = typename KF::StateVec; using StateMat = typename KF::StateMat; - using ObsMat = typename KF::ObsMat; - using MeasMat = typename KF::MeasMat; - using MeasVec = typename KF::MeasVec; + using ObsMat = typename KF::ObsMat; + using MeasMat = typename KF::MeasMat; + using MeasVec = typename KF::MeasVec; - StateVec x0; x0 << 0.0, 0.0; + StateVec x0; + x0 << 0.0, 0.0; StateMat P0 = StateMat::Identity(); - StateMat A; A << 1.0, 1.0, - 0.0, 1.0; - ObsMat H; H << 1.0, 0.0; - StateMat Q = 0.01 * StateMat::Identity(); - MeasMat R = 0.25 * MeasMat::Identity(); + StateMat A; + A << 1.0, 1.0, 0.0, 1.0; + ObsMat H; + H << 1.0, 0.0; + StateMat Q = 0.01 * StateMat::Identity(); + MeasMat R = 0.25 * MeasMat::Identity(); KF kf(x0, P0, A, H, Q, R); std::vector> zs; - zs.push_back(std::nullopt); // t=1: no measurement - MeasVec z1; z1 << 0.8; zs.push_back(z1); // t=2 - MeasVec z2; z2 << 1.6; zs.push_back(z2); // t=3 - zs.push_back(std::nullopt); // t=4 + zs.push_back(std::nullopt); // t=1: no measurement + MeasVec z1; + z1 << 0.8; + zs.push_back(z1); // t=2 + MeasVec z2; + z2 << 1.6; + zs.push_back(z2); // t=3 + zs.push_back(std::nullopt); // t=4 auto states = kf.filter(zs); REQUIRE(states.size() == zs.size()); diff --git a/crates/simple_optimizers/CMakeLists.txt b/crates/simple_optimizers/CMakeLists.txt new file mode 100644 index 0000000..e2302ad --- /dev/null +++ b/crates/simple_optimizers/CMakeLists.txt @@ -0,0 +1,19 @@ +# Header-only target +add_library(simple_optimizers INTERFACE) + +# Public includes (exports the include/ dir to consumers) +target_include_directories(simple_optimizers INTERFACE + ${CMAKE_CURRENT_SOURCE_DIR}/include +) + +# Link Eigen and set language level +target_link_libraries(simple_optimizers INTERFACE Eigen3::Eigen) +target_compile_features(simple_optimizers INTERFACE cxx_std_17) + +# Demo executable +add_executable(opt_demo src/main.cpp) +target_link_libraries(opt_demo PRIVATE simple_optimizers) + +# Tests +enable_testing() +add_subdirectory(tests) diff --git a/crates/simple_optimizers/include/optimizers.hpp b/crates/simple_optimizers/include/optimizers.hpp new file mode 100644 index 0000000..3daeb89 --- /dev/null +++ b/crates/simple_optimizers/include/optimizers.hpp @@ -0,0 +1,70 @@ +#pragma once +#include +#include +#include +#include + +namespace cppx::opt { + +class Optimizer { + public: + virtual ~Optimizer() = default; + + virtual void step(std::vector &weights, const std::vector &grads) = 0; +}; + +// --------------------------- GradientDescent --------------------------- +class GradientDescent final : public Optimizer { + public: + explicit GradientDescent(double learning_rate) : lr_(learning_rate) {} + + [[nodiscard]] double learning_rate() const noexcept { return lr_; } + + void step(std::vector &weights, const std::vector &grads) override { + if (weights.size() != grads.size()) { + throw std::invalid_argument("weights and grads size mismatch"); + } + for (std::size_t i = 0; i < weights.size(); ++i) { + weights[i] -= lr_ * grads[i]; + } + } + + private: + double lr_{}; +}; + +// ------------------------------- Momentum ------------------------------ +class Momentum final : public Optimizer { + public: + // struct Params { + // double learning_rate; + // double momentum; + // std::size_t dim; + // }; + + explicit Momentum(double learning_rate, double momentum, std::size_t dim) + : lr_(learning_rate), mu_(momentum), v_(dim, 0.0) {} + + [[nodiscard]] double learning_rate() const noexcept { return lr_; } + [[nodiscard]] double momentum() const noexcept { return mu_; } + [[nodiscard]] const std::vector &velocity() const noexcept { return v_; } + + void step(std::vector &weights, const std::vector &grads) override { + if (weights.size() != grads.size()) + throw std::invalid_argument("weights and grads size mismatch"); + if (v_.size() != weights.size()) + throw std::invalid_argument("velocity size mismatch"); + + for (std::size_t i = 0; i < weights.size(); ++i) { + v_[i] = mu_ * v_[i] + lr_ * grads[i]; // v ← μ v + η g + weights[i] -= v_[i]; // w ← w − v + } + } + + private: + double lr_{}; + double mu_{}; + std::vector v_; +}; + +} // namespace cppx::opt diff --git a/crates/simple_optimizers/src/main.cpp b/crates/simple_optimizers/src/main.cpp new file mode 100644 index 0000000..2ac5411 --- /dev/null +++ b/crates/simple_optimizers/src/main.cpp @@ -0,0 +1,6 @@ +#include + +int main() { + std::cout << "Hello, world!" << "\n"; + return 0; +} \ No newline at end of file diff --git a/crates/simple_optimizers/tests/CMakeLists.txt b/crates/simple_optimizers/tests/CMakeLists.txt new file mode 100644 index 0000000..d75c19c --- /dev/null +++ b/crates/simple_optimizers/tests/CMakeLists.txt @@ -0,0 +1,7 @@ +# crates/simple_optimizers/tests/CMakeLists.txt + +add_executable(simple_optizmiers_test test_simple_optimizers.cpp) + +target_link_libraries(simple_optizmiers_test PRIVATE simple_optimizers) + +add_test(NAME simple_optimizers_test COMMAND simple_optizmiers_test) \ No newline at end of file diff --git a/crates/simple_optimizers/tests/test_simple_optimizers.cpp b/crates/simple_optimizers/tests/test_simple_optimizers.cpp new file mode 100644 index 0000000..28e3b1c --- /dev/null +++ b/crates/simple_optimizers/tests/test_simple_optimizers.cpp @@ -0,0 +1,58 @@ +#include "optimizers.hpp" +#include +#include +#include + +using namespace cppx::opt; + +static bool approx_equal(double a, double b, double eps = 1e-6) { + return std::fabs(a - b) < eps; +} + +int main() { + // GradientDescent constructor + { + GradientDescent opt(1e-3); + assert(approx_equal(opt.learning_rate(), 1e-3)); + } + + // GradientDescent step + { + GradientDescent opt(0.1); + std::vector w{1.0, 2.0, 3.0}; + std::vector g{0.5, 0.5, 0.5}; + opt.step(w, g); + assert(approx_equal(w[0], 0.95)); + assert(approx_equal(w[1], 1.95)); + assert(approx_equal(w[2], 2.95)); + } + + // Momentum constructor + { + Momentum opt(0.01, 0.9, 10); + assert(approx_equal(opt.learning_rate(), 0.01)); + assert(approx_equal(opt.momentum(), 0.9)); + assert(opt.velocity().size() == 10); + } + + // Momentum step + { + Momentum opt(0.1, 0.9, 3); + std::vector w{1.0, 2.0, 3.0}; + std::vector g{0.5, 0.5, 0.5}; + + opt.step(w, g); + // after 1st step: same as GD(0.1) + assert(approx_equal(w[0], 0.95)); + assert(approx_equal(w[1], 1.95)); + assert(approx_equal(w[2], 2.95)); + + opt.step(w, g); + // expected: [0.855, 1.855, 2.855] + assert(approx_equal(w[0], 0.855)); + assert(approx_equal(w[1], 1.855)); + assert(approx_equal(w[2], 2.855)); + } + + return 0; +} From ae00464a6d9090989936a3666ac7c08631c13c77 Mon Sep 17 00:00:00 2001 From: Brian Staber Date: Wed, 27 Aug 2025 11:17:54 +0200 Subject: [PATCH 2/3] Add some braces --- crates/simple_optimizers/include/optimizers.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/simple_optimizers/include/optimizers.hpp b/crates/simple_optimizers/include/optimizers.hpp index 3daeb89..0d1f1c0 100644 --- a/crates/simple_optimizers/include/optimizers.hpp +++ b/crates/simple_optimizers/include/optimizers.hpp @@ -50,10 +50,12 @@ class Momentum final : public Optimizer { [[nodiscard]] const std::vector &velocity() const noexcept { return v_; } void step(std::vector &weights, const std::vector &grads) override { - if (weights.size() != grads.size()) + if (weights.size() != grads.size()) { throw std::invalid_argument("weights and grads size mismatch"); - if (v_.size() != weights.size()) + } + if (v_.size() != weights.size()) { throw std::invalid_argument("velocity size mismatch"); + } for (std::size_t i = 0; i < weights.size(); ++i) { v_[i] = mu_ * v_[i] + lr_ * grads[i]; // v ← μ v + η g From d838ad06ec75c40484e3e09aa3036070684dbb1e Mon Sep 17 00:00:00 2001 From: Brian Staber Date: Wed, 27 Aug 2025 11:40:38 +0200 Subject: [PATCH 3/3] Add a book chapter --- Justfile | 5 ++ book/src/SUMMARY.md | 4 +- book/src/simple_optimizers.md | 116 ++++++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 3 deletions(-) create mode 100644 book/src/simple_optimizers.md diff --git a/Justfile b/Justfile index bb79fe9..03bf6b7 100644 --- a/Justfile +++ b/Justfile @@ -53,3 +53,8 @@ fmt: fmt-check: find crates -type f \( -name '*.cpp' -o -name '*.hpp' \) -exec clang-format --dry-run --Werror {} + +# ------------------------- +# Book +# ------------------------- +serve-book: + mdbook serve book \ No newline at end of file diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index 07384d3..17a346d 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -1,5 +1,3 @@ - [Introduction](introduction.md) - [Kalman filter](kf_linear.md) - - [Class definition]() - - [Class implementation]() - - [Python bindings]() \ No newline at end of file +- [Simple optimizers](simple_optimizers.md) \ No newline at end of file diff --git a/book/src/simple_optimizers.md b/book/src/simple_optimizers.md new file mode 100644 index 0000000..3e8fff7 --- /dev/null +++ b/book/src/simple_optimizers.md @@ -0,0 +1,116 @@ +# Optimizers + +This chapter documents the small optimization module used in the project: a minimal runtime‑polymorphic interface `Optimizer` with two concrete implementations, Gradient Descent and Momentum. It is designed for clarity and easy swapping of algorithms in training loops. + + +## Problem setting + +Given parameters $\mathbf{w}\in\mathbb{R}^d$ and a loss $\mathcal{L}(\mathbf{w})$, an optimizer updates weights using the gradient +$$ +\mathbf{g}_t=\nabla_{\mathbf{w}}\mathcal{L}(\mathbf{w}_t). +$$ +Each algorithm defines an update rule $\mathbf{w}_{t+1} = \Phi(\mathbf{w}_t,\mathbf{g}_t,\theta)$ with hyper‑parameters $\theta$ (e.g., learning rate, momentum). + + +## API overview + +
+Click here to view the full implementation: include/cppx/opt/optimizers.hpp. We break into down in the sequel of this section. + +```cpp +{{#include ../../crates/simple_optimizers/include/optimizers.hpp}} +``` +
+ +Design choices +- A small virtual interface to enable swapping algorithms at runtime. +- `std::unique_ptr` for owning polymorphism; borrowing functions accept `Optimizer&`. +- Exceptions (`std::invalid_argument`) signal size mismatches. + + +## Gradient descent + +Update rule +$$ +\mathbf{w}_{t+1}=\mathbf{w}_{t}-\eta\,\mathbf{g}_t , +$$ +with learning rate $\eta>0$. + +Implementation +```cpp +void GradientDescent::step(std::vector& w, + const std::vector& g) { + if (w.size() != g.size()) throw std::invalid_argument("size mismatch"); + for (std::size_t i = 0; i < w.size(); ++i) { + w[i] -= lr_ * g[i]; + } +} +``` + +## Momentum-based gradient descent + +Update rule +$$ +\begin{aligned} +\mathbf{v}_{t+1} &= \mu\,\mathbf{v}_{t} + \eta\,\mathbf{g}_t, \\\\ +\mathbf{w}_{t+1} &= \mathbf{w}_{t} - \mathbf{v}_{t+1}, +\end{aligned} +$$ +with momentum $\mu\in[0,1)$ and learning rate $\eta>0$. + +Implementation +```cpp +Momentum::Momentum(double learning_rate, double momentum, std::size_t dim) + : lr_(learning_rate), mu_(momentum), v_(dim, 0.0) {} + +void Momentum::step(std::vector& w, const std::vector& g) { + if (w.size() != g.size()) throw std::invalid_argument("size mismatch"); + if (v_.size() != w.size()) throw std::invalid_argument("velocity size mismatch"); + + for (std::size_t i = 0; i < w.size(); ++i) { + v_[i] = mu_ * v_[i] + lr_ * g[i]; + w[i] -= v_[i]; + } +} +``` + +## Using the optimizers + +### Owning an optimizer (runtime polymorphism) + +```cpp +#include +#include "cppx/opt/optimizers.hpp" + +using namespace cppx::opt; + +std::vector w(d, 0.0), g(d, 0.0); + +// Choose an algorithm at runtime: +std::unique_ptr opt = + std::make_unique(/*lr=*/0.1, /*mu=*/0.9, /*dim=*/w.size()); + +for (int epoch = 0; epoch < 100; ++epoch) { + // ... compute gradients into g ... + opt->step(w, g); // updates w in place +} +``` + +### Borrowing an optimizer (no ownership transfer) + +```cpp +void train_one_epoch(Optimizer& opt, + std::vector& w, + std::vector& g) { + // ... fill g ... + opt.step(w, g); +} +``` + +### API variations (optional) + +If C++20 is available, `std::span` can make the interface container‑agnostic: + +```cpp +// virtual void step(std::span w, std::span g) = 0; +```