Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .github/workflows/gh-pages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,8 @@ jobs:
- name: Build api with pdoc3
run: uv run pdoc --html -c latex_math=True --output-dir docs --force heavytail

#- name: Build static HTML for notebooks
# run: |
# uv run jupyter nbconvert --to html --execute --allow-errors docs/*.ipynb
- name: Build static HTML pages for Jupyter notebooks
run: uv run jupyter nbconvert --to html --execute --allow-errors docs/*.ipynb

- name: Deploy
uses: JamesIves/github-pages-deploy-action@4.1.4 # Source: https://github.com/marketplace/actions/deploy-to-github-pages
Expand Down
156 changes: 156 additions & 0 deletions docs/TylerCovariance.ipynb

Large diffs are not rendered by default.

144 changes: 144 additions & 0 deletions docs/doc_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Copyright (c) 2024 Mohammadjavad Vakili. All rights reserved.
"""Utility functions for generating 2D data with outliers and plotting covariance contours."""

from typing import Any

import matplotlib.pyplot as plt
import numpy as np
from matplotlib.axes import Axes
from matplotlib.patches import Ellipse
from scipy.stats import multivariate_t


def generate_2d_data_with_outliers(
n_samples: int = 100,
n_outliers: int = 10,
mean: tuple = (0, 0),
cov: list | None = None,
outlier_range: float = 8,
) -> np.ndarray:
"""
Generate synthetic 2D data with outliers.

Parameters
----------
n_samples : int
Number of normal data samples to generate.
n_outliers : int
Number of outlier samples to generate.
mean : tuple
Mean of the normal data distribution.
cov : list or np.ndarray
Covariance matrix for the normal data.
outlier_range : float
Range for generating outlier values.

Returns
-------
np.ndarray
Combined array of normal data and outliers, shape (n_samples + n_outliers, 2).
"""
if cov is None:
cov = [[1, 0.5], [0.5, 1]]
# Generate normal data
rng = np.random.default_rng()
data = rng.multivariate_normal(mean, cov, n_samples)
# Generate outliers
outliers = rng.uniform(low=-outlier_range, high=outlier_range, size=(n_outliers, 2))
# Combine data and outliers
return np.vstack([data, outliers])


def generate_2d_student_t_data(
n_samples: int = 100,
cov: list | None = None,
nu: float = 4.0,
) -> np.ndarray:
"""
Generate synthetic 2D data from a multivariate Student's t-distribution.

Parameters
----------
n_samples : int
Number of samples to generate.
cov : list or np.ndarray
Covariance matrix for the data.
nu : float
Degrees of freedom for the Student's t-distribution.

Returns
-------
np.ndarray
Generated data of shape (n_samples, 2).
"""
if cov is None:
cov = [[1, 0], [0, 1]]
scatter = (nu / (nu - 2)) * np.array(cov)
return multivariate_t.rvs(loc=[0, 0], shape=scatter, df=nu, size=n_samples)


def plot_covariance_contour(
mean: tuple[float, float],
cov: np.ndarray,
ax: Axes | None = None,
n_std: float = 2.0,
**kwargs: dict[str, Any],
) -> Axes:
"""Plot covariance ellipse for 2D data.

Parameters
----------
mean: tuple[float, float]
Mean vector (2D).
cov: np.ndarray
Covariance matrix (2x2).
ax: Optional[plt.Axes]
Matplotlib axis to plot on. If None, uses current axis.
n_std: float
Factor for scaling the standard deviations for ellipse size.
**kwargs: dict[str, Any]
Additional keyword arguments for Ellipse.

Returns
-------
plt.axes.Axes
The matplotlib axis with the covariance ellipse and data plotted.
"""
if ax is None:
ax = plt.gca()
# Eigenvalue decomposition
evals, evecs = np.linalg.eigh(cov)
order = evals.argsort()[::-1]
evals, evecs = evals[order], evecs[:, order]
theta = np.degrees(np.arctan2(*evecs[:, 0][::-1]))
width, height = 2 * n_std * np.sqrt(evals)
ellip = Ellipse(xy=mean, width=width, height=height, angle=theta, edgecolor="red", fc="None", lw=2, **kwargs)
ax.add_patch(ellip)
ax.set_aspect("equal")
ax.set_xlabel("X")
ax.set_ylabel("Y")
return ax


def plot_2d_data(data: np.ndarray, ax: Axes | None = None) -> Axes:
"""Plot 2D data points.

Parameters
----------
data: np.ndarray
2D data points of shape (n_samples, 2).
ax: Optional[plt.Axes]
Matplotlib axis to plot on. If None, uses current axis.

Returns
-------
plt.axes.Axes
The matplotlib axis with the 2D data points plotted.
"""
if ax is None:
ax = plt.gca()
ax.scatter(data[:, 0], data[:, 1], s=10)
ax.set_aspect("equal")
ax.set_xlabel("X")
ax.set_ylabel("Y")
return ax
30 changes: 29 additions & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
@@ -1 +1,29 @@
Hi.
# [heavy<sub>tail</sub>]


[🌐 **GitHub**](https://github.com/quantfinlib/heavy-tail)
&nbsp;&nbsp;&nbsp; [🔗 **API**](heavytail)
&nbsp;&nbsp;&nbsp; [📖 **Docs**](https://quantfinlib.github.io/heavy-tail/)


## Getting Started

* [Tyler Covariance Estimator](TylerCovariance.html)


## Documentation

The documentation is available at [githubpages](https://quantfinlib.github.io/heavy-tail/).
The [🔗 API documentation](heavytail) is generated using [pdoc3](https://pdoc3.github.io/pdoc/).

To manually generate the documentation, first, install the heavytail package with the doc dependencies using `uv`:

```bash
$ uv pip install -e .[docs]
```

Then

```bash
$ uv run pdoc --html -c latex_math=True --output-dir docs --force heavytail
```
Loading