diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..ea3a0e2 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,65 @@ +name: ci +on: + push: + branches: ["main"] + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.10", "3.12"] + + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + + - name: Install Python deps + shell: bash + run: | + py="${{ matrix.python-version }}" + if [[ "$py" == "3.10" || "$py" == "3.11" || "$py" == "3.12" ]]; then + spec="==1.26.*" + else + spec="==1.24.*" + fi + python -m pip install --upgrade pip + python -m pip install "numpy${spec}" pytest pytest-cov maturin + + - name: Install cargo-tarpaulin + run: cargo install cargo-tarpaulin --locked + + - name: Rust fmt + run: cargo fmt --check + + - name: Rust clippy + run: cargo clippy -- -D warnings + + - name: Rust tests + run: cargo test + + - name: Rust coverage + run: cargo tarpaulin --out Xml + + - name: Build Python extension + run: maturin develop --release + + - name: Python tests + run: pytest --cov=ddstats --cov-report=xml + + - name: Upload coverage artifacts + uses: actions/upload-artifact@v4 + with: + name: coverage-${{ matrix.python-version }} + path: | + cobertura.xml + coverage.xml diff --git a/README.md b/README.md index 2f98ee9..2312a86 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Fast drawdown & CED metrics in Rust with NumPy bindings. ## Overview -`ddstats` provides high-performance financial metrics, including drawdown and Expected Drawdown (CED), implemented in Rust and exposed to Python via NumPy bindings. This allows for fast computations directly from Python, leveraging Rust's speed and safety. +`ddstats` provides high-performance financial metrics, including drawdown and Conditional Expected Drawdown (CED), implemented in Rust and exposed to Python via NumPy bindings. This allows for fast computations directly from Python, leveraging Rust's speed and safety. ## Features @@ -49,12 +49,28 @@ Computes the maximum drawdown of a time series. - **x**: 1D NumPy array of floats. -### `ddstats.ced(x: np.ndarray, level: float) -> float` +### `ddstats.ced(x: np.ndarray, t: int, alpha: float) -> float` Computes the Conditional Expected Drawdown at a given confidence level. - **x**: 1D NumPy array of floats. -- **level**: Confidence level (e.g., 0.95). +- **t**: Rolling drawdown distribution window size. +- **alpha**: Confidence level (between 0 and 1). + +## Behavior and edge cases + +- Any `NaN` in inputs yields `NaN` in the corresponding output. +- Empty inputs return `NaN`. +- Rolling windows honor `min_window` warm-up and `step` stride. + +## Testing + +```sh +python -m pip install maturin pytest +maturin develop +pytest +cargo test +``` ## Building @@ -67,3 +83,8 @@ maturin build ## License MIT License. See [LICENSE](LICENSE). + +## Acknowledgements + +Inspired by the work of Lisa R. Goldberg and Ola Mahmoud, Drawdown: From Practice to Theory and Back Again. +arXiv:1404.7493 diff --git a/ddstats.pyi b/ddstats.pyi index 6fa454c..92039b0 100644 --- a/ddstats.pyi +++ b/ddstats.pyi @@ -3,6 +3,8 @@ from typing import Literal from numpy.typing import NDArray import numpy as np +__version__: str + def max_drawdown(returns: NDArray[np.float64], /) -> float: """ Compute the maximum drawdown (MDD). diff --git a/pyproject.toml b/pyproject.toml index df971de..80f82ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,12 @@ dependencies = [ "numpy>=2.0; python_version>='3.13'", ] +[project.optional-dependencies] +test = [ + "pytest>=7", + "pytest-cov>=4", +] + [tool.maturin] features = ["pyo3/extension-module"] include = ["ddstats.pyi", "py.typed"] diff --git a/src/lib.rs b/src/lib.rs index 6bb9e8f..539f366 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -82,8 +82,12 @@ use std::collections::BinaryHeap; #[inline(always)] fn max_drawdown_core(rets: &[f64]) -> f64 { let n = rets.len(); - if n == 0 { return f64::NAN; } - if rets.iter().any(|x| x.is_nan()) { return f64::NAN; } + if n == 0 { + return f64::NAN; + } + if rets.iter().any(|x| x.is_nan()) { + return f64::NAN; + } let mut cur_acc = 1.0_f64; let mut cur_max = 1.0_f64; @@ -144,7 +148,9 @@ fn rolling_bounds(n: usize, window: usize, min_window: usize, step: usize) -> Ve (window, i - (window - min_window)) }; let end_i = start_i + i_window; - if end_i > n { break; } + if end_i > n { + break; + } bounds.push((start_i, end_i)); } bounds @@ -164,11 +170,18 @@ fn rolling_bounds(n: usize, window: usize, min_window: usize, step: usize) -> Ve /// /// # Notes /// - Returns `NaN` for any window that contains a `NaN`. -fn rolling_max_drawdown_core(rets: &[f64], window: usize, min_window: usize, step: usize) -> Vec { +fn rolling_max_drawdown_core( + rets: &[f64], + window: usize, + min_window: usize, + step: usize, +) -> Vec { let n = rets.len(); let bounds = rolling_bounds(n, window, min_window, step); let mut out = Vec::with_capacity(bounds.len()); - for (s, e) in bounds { out.push(max_drawdown_core(&rets[s..e])); } + for (s, e) in bounds { + out.push(max_drawdown_core(&rets[s..e])); + } out } @@ -177,7 +190,12 @@ fn rolling_max_drawdown_core(rets: &[f64], window: usize, min_window: usize, ste /// /// # Parallelism /// Each window is independent and mapped over `par_iter()`. -fn rolling_max_drawdown_core_par(rets: &[f64], window: usize, min_window: usize, step: usize) -> Vec { +fn rolling_max_drawdown_core_par( + rets: &[f64], + window: usize, + min_window: usize, + step: usize, +) -> Vec { let n = rets.len(); let bounds = rolling_bounds(n, window, min_window, step); bounds @@ -208,8 +226,12 @@ fn rolling_max_drawdown_core_par(rets: &[f64], window: usize, min_window: usize, #[inline] fn quantile_linear(mut data: Vec, alpha: f64) -> f64 { let n = data.len(); - if n == 0 || !(0.0..=1.0).contains(&alpha) { return f64::NAN; } - if data.iter().any(|x| x.is_nan()) { return f64::NAN; } + if n == 0 || !(0.0..=1.0).contains(&alpha) { + return f64::NAN; + } + if data.iter().any(|x| x.is_nan()) { + return f64::NAN; + } data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal)); let k = alpha * (n as f64 - 1.0); let lo = k.floor() as usize; @@ -255,7 +277,9 @@ fn quantile_linear(mut data: Vec, alpha: f64) -> f64 { fn expanding_ced_heap_core(rets: &[f64], t: usize, alpha: f64, parallel: bool) -> Vec { let n = rets.len(); let mut out = vec![f64::NAN; n]; - if n < t { return out; } + if n < t { + return out; + } let mdds: Vec = if parallel { rolling_max_drawdown_core_par(rets, t, t, 1) @@ -273,9 +297,9 @@ fn expanding_ced_heap_core(rets: &[f64], t: usize, alpha: f64, parallel: bool) - // Rebalance heaps so that upper.len() == k let rebalance_to_k = |k: usize, - lower: &mut BinaryHeap>, - upper: &mut BinaryHeap>>, - sum_upper: &mut f64| { + lower: &mut BinaryHeap>, + upper: &mut BinaryHeap>>, + sum_upper: &mut f64| { while upper.len() > k { let Reverse(v) = upper.pop().unwrap(); *sum_upper -= v.into_inner(); @@ -285,7 +309,9 @@ fn expanding_ced_heap_core(rets: &[f64], t: usize, alpha: f64, parallel: bool) - if let Some(v) = lower.pop() { *sum_upper += v.into_inner(); upper.push(Reverse(v)); - } else { break; } + } else { + break; + } } }; @@ -294,8 +320,13 @@ fn expanding_ced_heap_core(rets: &[f64], t: usize, alpha: f64, parallel: bool) - let len_prefix = idx + 1; let v = mdds[idx]; - if v.is_nan() { saw_nan = true; } - if saw_nan { out[a] = f64::NAN; continue; } + if v.is_nan() { + saw_nan = true; + } + if saw_nan { + out[a] = f64::NAN; + continue; + } let nv = NotNan::new(v).unwrap(); @@ -320,7 +351,9 @@ fn expanding_ced_heap_core(rets: &[f64], t: usize, alpha: f64, parallel: bool) - // NumPy tail count: k = L - ceil(alpha*(L-1)), ensure k >= 1 let hi = (alpha * ((len_prefix - 1) as f64)).ceil() as usize; let mut k = len_prefix.saturating_sub(hi); - if k == 0 { k = 1; } + if k == 0 { + k = 1; + } rebalance_to_k(k, &mut lower, &mut upper, &mut sum_upper); @@ -332,11 +365,17 @@ fn expanding_ced_heap_core(rets: &[f64], t: usize, alpha: f64, parallel: bool) - let v2 = lower.pop().unwrap(); sum_upper += v2.into_inner(); upper.push(Reverse(v2)); - } else { break; } + } else { + break; + } } } - out[a] = if upper.is_empty() { f64::NAN } else { sum_upper / (upper.len() as f64) }; + out[a] = if upper.is_empty() { + f64::NAN + } else { + sum_upper / (upper.len() as f64) + }; } out @@ -366,7 +405,9 @@ fn expanding_ced_heap_core(rets: &[f64], t: usize, alpha: f64, parallel: bool) - fn expanding_ced_sort_core(rets: &[f64], t: usize, alpha: f64, parallel: bool) -> Vec { let n = rets.len(); let mut out = vec![f64::NAN; n]; - if n < t { return out; } + if n < t { + return out; + } let r = if parallel { rolling_max_drawdown_core_par(rets, t, t, 1) } else { @@ -376,17 +417,26 @@ fn expanding_ced_sort_core(rets: &[f64], t: usize, alpha: f64, parallel: bool) - let end = a - (t - 1) + 1; let slice = r[..end].to_vec(); let q = quantile_linear(slice.clone(), alpha); - if q.is_nan() { out[a] = f64::NAN; continue; } + if q.is_nan() { + out[a] = f64::NAN; + continue; + } let (mut sum, mut cnt) = (0.0, 0usize); for &v in &slice { - if v >= q { sum += v; cnt += 1; } + if v >= q { + sum += v; + cnt += 1; + } } - out[a] = if cnt == 0 { f64::NAN } else { sum / (cnt as f64) }; + out[a] = if cnt == 0 { + f64::NAN + } else { + sum / (cnt as f64) + }; } out } - // ---------- Python bindings ---------- #[pyfunction] @@ -421,8 +471,8 @@ fn rolling_max_drawdown( } }); - let arr = v.into_pyarray(py); - Ok(arr.unbind()) + let arr = v.into_pyarray(py); + Ok(arr.unbind()) } #[pyfunction] @@ -441,15 +491,26 @@ fn ced( } else { rolling_max_drawdown_core(rets, t, t, 1) }; - if r.is_empty() { return f64::NAN; } + if r.is_empty() { + return f64::NAN; + } let q = quantile_linear(r.clone(), alpha); - if q.is_nan() { return f64::NAN; } + if q.is_nan() { + return f64::NAN; + } let mut sum = 0.0; let mut cnt = 0usize; for &v in &r { - if v >= q { sum += v; cnt += 1; } + if v >= q { + sum += v; + cnt += 1; + } + } + if cnt == 0 { + f64::NAN + } else { + sum / cnt as f64 } - if cnt == 0 { f64::NAN } else { sum / cnt as f64 } }); Ok(val) } @@ -470,17 +531,80 @@ fn expanding_ced( "sort" => expanding_ced_sort_core(rets, t, alpha, parallel), _ => expanding_ced_heap_core(rets, t, alpha, parallel), }); - let arr = v.into_pyarray(py); - Ok(arr.unbind()) + let arr = v.into_pyarray(py); + Ok(arr.unbind()) } #[pymodule] fn ddstats(_py: Python<'_>, m: &Bound) -> PyResult<()> { - m.add("__doc__", "ddstats: drawdown & CED metrics in Rust with NumPy bindings.")?; - m.add("__version__", "0.4.0")?; + m.add( + "__doc__", + "ddstats: drawdown & CED metrics in Rust with NumPy bindings.", + )?; + m.add("__version__", env!("CARGO_PKG_VERSION"))?; m.add_function(wrap_pyfunction!(max_drawdown, m)?)?; m.add_function(wrap_pyfunction!(rolling_max_drawdown, m)?)?; m.add_function(wrap_pyfunction!(ced, m)?)?; m.add_function(wrap_pyfunction!(expanding_ced, m)?)?; Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + + fn assert_close(a: f64, b: f64, tol: f64) { + if a.is_nan() && b.is_nan() { + return; + } + assert!((a - b).abs() <= tol, "{a} vs {b}"); + } + + #[test] + fn max_drawdown_basic_cases() { + let mdd = max_drawdown_core(&[0.10, -0.10]); + assert_close(mdd, 0.10, 1e-12); + let mdd_up = max_drawdown_core(&[0.01, 0.02, 0.03]); + assert_close(mdd_up, 0.0, 1e-12); + let mdd_empty = max_drawdown_core(&[]); + assert!(mdd_empty.is_nan()); + let mdd_nan = max_drawdown_core(&[0.01, f64::NAN]); + assert!(mdd_nan.is_nan()); + } + + #[test] + fn rolling_bounds_example() { + let bounds = rolling_bounds(5, 3, 2, 1); + assert_eq!(bounds, vec![(0, 2), (0, 3), (1, 4), (2, 5)]); + } + + #[test] + fn quantile_linear_endpoints() { + assert_close(quantile_linear(vec![0.0, 1.0], 0.0), 0.0, 1e-12); + assert_close(quantile_linear(vec![0.0, 1.0], 1.0), 1.0, 1e-12); + assert_close(quantile_linear(vec![0.0, 1.0], 0.5), 0.5, 1e-12); + assert!(quantile_linear(vec![], 0.5).is_nan()); + } + + #[test] + fn rolling_max_drawdown_par_matches_serial() { + let rets = [0.01, -0.02, 0.03, -0.01, 0.02]; + let serial = rolling_max_drawdown_core(&rets, 3, 2, 1); + let parallel = rolling_max_drawdown_core_par(&rets, 3, 2, 1); + assert_eq!(serial.len(), parallel.len()); + for (a, b) in serial.iter().zip(parallel.iter()) { + assert_close(*a, *b, 1e-12); + } + } + + #[test] + fn expanding_ced_heap_matches_sort() { + let rets = [0.01, -0.02, 0.03, -0.01, 0.02]; + let heap = expanding_ced_heap_core(&rets, 3, 0.9, false); + let sort = expanding_ced_sort_core(&rets, 3, 0.9, false); + assert_eq!(heap.len(), sort.len()); + for (a, b) in heap.iter().zip(sort.iter()) { + assert_close(*a, *b, 1e-12); + } + } +} diff --git a/tests/test_ddstats.py b/tests/test_ddstats.py new file mode 100644 index 0000000..1df48fe --- /dev/null +++ b/tests/test_ddstats.py @@ -0,0 +1,84 @@ +import importlib.metadata +import math + +import numpy as np + +import ddstats + + +def max_drawdown_py(returns: np.ndarray) -> float: + if returns.size == 0 or np.isnan(returns).any(): + return math.nan + cur_acc = 1.0 + cur_max = 1.0 + max_dd = 0.0 + for r in returns: + cur_acc *= 1.0 + float(r) + if cur_acc > cur_max: + cur_max = cur_acc + else: + drawdown = (cur_acc - cur_max) / cur_max + if drawdown < max_dd: + max_dd = drawdown + return -max_dd + + +def rolling_bounds_py(n: int, window: int, min_window: int, step: int) -> list[tuple[int, int]]: + if n == 0 or min_window == 0 or step == 0 or n < min_window: + return [] + bounds = [] + max_window_i = n - min_window + 1 + for i in range(0, max_window_i, step): + if i < max(window - min_window, 0): + i_window = min_window + i + start_i = 0 + else: + i_window = window + start_i = i - (window - min_window) + end_i = start_i + i_window + if end_i > n: + break + bounds.append((start_i, end_i)) + return bounds + + +def test_max_drawdown_basic_cases() -> None: + data = np.array([0.10, -0.10], dtype=float) + assert math.isclose(ddstats.max_drawdown(data), 0.10, rel_tol=0.0, abs_tol=1e-12) + data_up = np.array([0.01, 0.02, 0.03], dtype=float) + assert ddstats.max_drawdown(data_up) == 0.0 + data_nan = np.array([0.01, np.nan], dtype=float) + assert math.isnan(ddstats.max_drawdown(data_nan)) + + +def test_rolling_max_drawdown_matches_python() -> None: + rets = np.array([0.01, -0.02, 0.03, -0.01, 0.02], dtype=float) + expected = [] + for start, end in rolling_bounds_py(len(rets), 3, 2, 1): + expected.append(max_drawdown_py(rets[start:end])) + out = ddstats.rolling_max_drawdown(rets, window=3, min_window=2, step=1, parallel=False) + np.testing.assert_allclose(out, np.array(expected), rtol=0.0, atol=1e-12, equal_nan=True) + + +def test_ced_matches_python_reference() -> None: + rets = np.array([0.01, -0.02, 0.03, -0.01, 0.02], dtype=float) + t = 3 + rolling = [] + for start, end in rolling_bounds_py(len(rets), t, t, 1): + rolling.append(max_drawdown_py(rets[start:end])) + rolling = np.array(rolling) + q = np.quantile(rolling, 0.9, method="linear") + expected = rolling[rolling >= q].mean() + out = ddstats.ced(rets, t=t, alpha=0.9, parallel=False) + assert math.isclose(out, expected, rel_tol=0.0, abs_tol=1e-12) + + +def test_expanding_ced_heap_matches_sort() -> None: + rets = np.array([0.01, -0.02, 0.03, -0.01, 0.02], dtype=float) + heap = ddstats.expanding_ced(rets, t=3, alpha=0.9, method="heap", parallel=False) + sort = ddstats.expanding_ced(rets, t=3, alpha=0.9, method="sort", parallel=False) + np.testing.assert_allclose(heap, sort, rtol=0.0, atol=1e-12, equal_nan=True) + + +def test_version_matches_metadata() -> None: + assert ddstats.__version__ == importlib.metadata.version("ddstats")