diff --git a/.gitignore b/.gitignore index fa842d1..bb41570 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,7 @@ __pycache__/ vmlinux.py ~* vmlinux.h + +# Documentation build artifacts +docs/_build/ +docs/_templates/ diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..53c2ad8 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,52 @@ +# PythonBPF Documentation + +This directory contains the Sphinx documentation for PythonBPF. + +## Building the Documentation + +### Prerequisites + +Install the documentation dependencies: + +**Using uv (recommended):** +```bash +uv pip install -r requirements.txt +# Or install the optional docs dependencies +uv pip install pythonbpf[docs] +``` + +**Using pip:** +```bash +pip install -r requirements.txt +# Or install the optional docs dependencies +pip install pythonbpf[docs] +``` + +### Build HTML Documentation + +```bash +make html +``` + +The generated documentation will be in `_build/html/`. Open `_build/html/index.html` in a browser to view. + +### Other Build Formats + +```bash +make latexpdf # Build PDF documentation +make epub # Build ePub format +make clean # Clean build artifacts +``` + +## Documentation Structure + +- `index.md` - Main landing page +- `getting-started/` - Installation and quick start guides +- `user-guide/` - Comprehensive user documentation +- `api/` - API reference documentation +- `conf.py` - Sphinx configuration +- `_static/` - Static files (images, CSS, etc.) + +## Writing Documentation + +Documentation is written in Markdown using [MyST-Parser](https://myst-parser.readthedocs.io/). See the existing files for examples. diff --git a/docs/_static/.gitkeep b/docs/_static/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/docs/api/index.md b/docs/api/index.md new file mode 100644 index 0000000..46cb878 --- /dev/null +++ b/docs/api/index.md @@ -0,0 +1,471 @@ +# API Reference + +This section provides detailed API documentation for all PythonBPF modules, classes, and functions. + +## Module Overview + +PythonBPF is organized into several modules: + +* `pythonbpf` - Main module with decorators and compilation functions +* `pythonbpf.maps` - BPF map types +* `pythonbpf.helper` - BPF helper functions +* `pythonbpf.structs` - Struct type handling +* `pythonbpf.codegen` - Code generation and compilation + +## Public API + +The main `pythonbpf` module exports the following public API: + +```python +from pythonbpf import ( + # Decorators + bpf, + map, + section, + bpfglobal, + struct, + + # Compilation + compile_to_ir, + compile, + BPF, + + # Utilities + trace_pipe, + trace_fields, +) +``` + +## Decorators + +```{eval-rst} +.. automodule:: pythonbpf.decorators + :members: + :undoc-members: + :show-inheritance: +``` + +### bpf + +```python +@bpf +def my_function(): + pass +``` + +Decorator to mark a function or class for BPF compilation. Any function or class decorated with `@bpf` will be processed by the PythonBPF compiler. + +**See also:** {doc}`../user-guide/decorators` + +### map + +```python +@bpf +@map +def my_map() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=1024) +``` + +Decorator to mark a function as a BPF map definition. The function must return a map type. + +**See also:** {doc}`../user-guide/maps` + +### section + +```python +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def trace_open(ctx: c_void_p) -> c_int64: + return c_int64(0) +``` + +Decorator to specify which kernel hook to attach the BPF program to. + +**Parameters:** +* `name` (str) - The section name (e.g., "tracepoint/...", "kprobe/...", "xdp") + +**See also:** {doc}`../user-guide/decorators` + +### bpfglobal + +```python +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" +``` + +Decorator to mark a function as a BPF global variable definition. + +**See also:** {doc}`../user-guide/decorators` + +### struct + +```python +@bpf +@struct +class Event: + timestamp: c_uint64 + pid: c_uint32 +``` + +Decorator to mark a class as a BPF struct definition. + +**See also:** {doc}`../user-guide/structs` + +## Compilation Functions + +```{eval-rst} +.. automodule:: pythonbpf.codegen + :members: compile_to_ir, compile, BPF + :undoc-members: + :show-inheritance: +``` + +### compile_to_ir() + +```python +def compile_to_ir( + filename: str, + output: str, + loglevel=logging.WARNING +) -> None +``` + +Compile Python source to LLVM Intermediate Representation. + +**Parameters:** +* `filename` (str) - Path to the Python source file +* `output` (str) - Path for the output LLVM IR file (.ll) +* `loglevel` - Logging level (default: logging.WARNING) + +**See also:** {doc}`../user-guide/compilation` + +### compile() + +```python +def compile( + filename: str = None, + output: str = None, + loglevel=logging.WARNING +) -> None +``` + +Compile Python source to BPF object file. + +**Parameters:** +* `filename` (str, optional) - Path to the Python source file (default: calling file) +* `output` (str, optional) - Path for the output object file (default: same name with .o extension) +* `loglevel` - Logging level (default: logging.WARNING) + +**See also:** {doc}`../user-guide/compilation` + +### BPF + +```python +class BPF: + def __init__( + self, + filename: str = None, + loglevel=logging.WARNING + ) + + def load(self) -> BpfObject + def attach_all(self) -> None + def load_and_attach(self) -> BpfObject +``` + +High-level interface to compile, load, and attach BPF programs. + +**Parameters:** +* `filename` (str, optional) - Path to Python source file (default: calling file) +* `loglevel` - Logging level (default: logging.WARNING) + +**Methods:** +* `load()` - Load the compiled BPF program into the kernel +* `attach_all()` - Attach all BPF programs to their hooks +* `load_and_attach()` - Convenience method that loads and attaches + +**See also:** {doc}`../user-guide/compilation` + +## Utilities + +```{eval-rst} +.. automodule:: pythonbpf.utils + :members: + :undoc-members: + :show-inheritance: +``` + +### trace_pipe() + +```python +def trace_pipe() -> None +``` + +Read and display output from the kernel trace pipe. + +Blocks until interrupted with Ctrl+C. Displays BPF program output from `print()` statements. + +**See also:** {doc}`../user-guide/helpers` + +### trace_fields() + +```python +def trace_fields() -> tuple +``` + +Parse one line from the trace pipe into structured fields. + +**Returns:** Tuple of `(task, pid, cpu, flags, timestamp, message)` +* `task` (str) - Task/process name +* `pid` (int) - Process ID +* `cpu` (int) - CPU number +* `flags` (bytes) - Trace flags +* `timestamp` (float) - Timestamp in seconds +* `message` (str) - The trace message + +**See also:** {doc}`../user-guide/helpers` + +## Map Types + +```{eval-rst} +.. automodule:: pythonbpf.maps.maps + :members: + :undoc-members: + :show-inheritance: +``` + +### HashMap + +```python +class HashMap: + def __init__( + self, + key, + value, + max_entries: int + ) + + def lookup(self, key) + def update(self, key, value, flags=None) + def delete(self, key) +``` + +Hash map for efficient key-value storage. + +**Parameters:** +* `key` - The type of the key (ctypes type or struct) +* `value` - The type of the value (ctypes type or struct) +* `max_entries` (int) - Maximum number of entries + +**Methods:** +* `lookup(key)` - Look up a value by key +* `update(key, value, flags=None)` - Update or insert a key-value pair +* `delete(key)` - Remove an entry from the map + +**See also:** {doc}`../user-guide/maps` + +### PerfEventArray + +```python +class PerfEventArray: + def __init__( + self, + key_size, + value_size + ) + + def output(self, data) +``` + +Perf event array for sending data to userspace. + +**Parameters:** +* `key_size` - Type for the key +* `value_size` - Type for the value + +**Methods:** +* `output(data)` - Send data to userspace + +**See also:** {doc}`../user-guide/maps` + +### RingBuffer + +```python +class RingBuffer: + def __init__(self, max_entries: int) + + def output(self, data, flags=0) + def reserve(self, size: int) + def submit(self, data, flags=0) + def discard(self, data, flags=0) +``` + +Ring buffer for efficient event delivery. + +**Parameters:** +* `max_entries` (int) - Maximum size in bytes (must be power of 2) + +**Methods:** +* `output(data, flags=0)` - Send data to the ring buffer +* `reserve(size)` - Reserve space in the buffer +* `submit(data, flags=0)` - Submit previously reserved space +* `discard(data, flags=0)` - Discard previously reserved space + +**See also:** {doc}`../user-guide/maps` + +## Helper Functions + +```{eval-rst} +.. automodule:: pythonbpf.helper.helpers + :members: + :undoc-members: + :show-inheritance: +``` + +### Process Information + +* `pid()` - Get current process ID +* `comm(buf)` - Get current process command name (requires buffer parameter) +* `uid()` - Get current user ID + +### Time + +* `ktime()` - Get current kernel time in nanoseconds + +### CPU + +* `smp_processor_id()` - Get current CPU ID + +### Memory + +* `probe_read(dst, size, src)` - Safely read kernel memory +* `probe_read_str(dst, src)` - Safely read string from kernel memory +* `deref(ptr)` - Dereference a pointer + +### Random + +* `random()` - Get pseudo-random number + +**See also:** {doc}`../user-guide/helpers` + +## Type System + +PythonBPF uses Python's `ctypes` module for type definitions: + +### Integer Types + +* `c_int8`, `c_int16`, `c_int32`, `c_int64` - Signed integers +* `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64` - Unsigned integers + +### Other Types + +* `c_char`, `c_bool` - Characters and booleans +* `c_void_p` - Void pointers +* `str(N)` - Fixed-length strings + +## Examples + +### Basic Usage + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello(ctx: c_void_p) -> c_int64: + print("Hello, World!") + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() +trace_pipe() +``` + +### With Maps + +```python +from pythonbpf import bpf, map, section, bpfglobal, BPF +from pythonbpf.maps import HashMap +from pythonbpf.helper import pid +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@map +def counters() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=256) + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def count_clones(ctx: c_void_p) -> c_int64: + process_id = pid() + count = counters.lookup(process_id) + + if count: + counters.update(process_id, count + 1) + else: + counters.update(process_id, 1) + + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() +``` + +### With Structs + +```python +from pythonbpf import bpf, struct, map, section, bpfglobal, BPF +from pythonbpf.maps import RingBuffer +from pythonbpf.helper import pid, ktime, comm +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@struct +class Event: + timestamp: c_uint64 + pid: c_uint32 + comm: str(16) + +@bpf +@map +def events() -> RingBuffer: + return RingBuffer(max_entries=4096) + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def track_exec(ctx: c_void_p) -> c_int64: + event = Event() + event.timestamp = ktime() + event.pid = pid() + comm(event.comm) + + events.output(event) + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() +``` + +## See Also + +* {doc}`../user-guide/index` - Comprehensive user guide +* {doc}`../getting-started/quickstart` - Quick start tutorial +* [GitHub Repository](https://github.com/pythonbpf/Python-BPF) - Source code and examples diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..8bddd93 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,105 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +import os +import sys + +# Add the parent directory to the path so we can import pythonbpf +sys.path.insert(0, os.path.abspath("..")) + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "PythonBPF" +copyright = "2026, Pragyansh Chaturvedi, Varun Mallya" +author = "Pragyansh Chaturvedi, Varun Mallya" +release = "0.1.8" +version = "0.1.8" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "myst_parser", + "sphinx.ext.autodoc", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", + "sphinx_copybutton", +] + +# MyST-Parser configuration +myst_enable_extensions = [ + "colon_fence", + "deflist", + "fieldlist", +] + +# Napoleon settings for Google/NumPy style docstrings +napoleon_google_docstring = True +napoleon_numpy_docstring = True +napoleon_include_init_with_doc = True +napoleon_include_private_with_doc = False +napoleon_include_special_with_doc = True +napoleon_use_admonition_for_examples = True +napoleon_use_admonition_for_notes = True +napoleon_use_admonition_for_references = False +napoleon_use_ivar = False +napoleon_use_param = True +napoleon_use_rtype = True +napoleon_type_aliases = None + +# Intersphinx mapping +intersphinx_mapping = { + "python": ("https://docs.python.org/3", None), + "llvmlite": ("https://llvmlite.readthedocs.io/en/latest/", None), +} + +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +# Source file suffixes +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} + +# The master toctree document +master_doc = "index" + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "sphinx_rtd_theme" +html_static_path = ["_static"] + +# Theme options +html_theme_options = { + "logo_only": False, + "display_version": True, + "prev_next_buttons_location": "bottom", + "style_external_links": False, + "vcs_pageview_mode": "", + # Toc options + "collapse_navigation": False, + "sticky_navigation": True, + "navigation_depth": 4, + "includehidden": True, + "titles_only": False, +} + +# -- Options for autodoc ----------------------------------------------------- + +autodoc_default_options = { + "members": True, + "member-order": "bysource", + "special-members": "__init__", + "undoc-members": True, + "exclude-members": "__weakref__", +} + +autodoc_typehints = "description" + +exclude_patterns = ["README.md"] diff --git a/docs/getting-started/index.md b/docs/getting-started/index.md new file mode 100644 index 0000000..affc195 --- /dev/null +++ b/docs/getting-started/index.md @@ -0,0 +1,35 @@ +# Getting Started + +Welcome to PythonBPF! This section will help you get started with writing eBPF programs in Python. + +## What You'll Learn + +In this section, you'll learn how to: + +1. **Install PythonBPF** - Set up your development environment with all necessary dependencies +2. **Write Your First Program** - Create a simple BPF program to understand the basics +3. **Understand Core Concepts** - Learn about decorators, compilation, and program structure + +## Prerequisites + +Before you begin, make sure you have: + +* A Linux system (eBPF requires Linux kernel 4.15+) +* Python 3.10 or higher +* Root or sudo access (required for loading BPF programs) + +## Next Steps + +After completing the getting started guide, you can: + +* Explore the {doc}`../user-guide/index` for detailed information on features +* Check out the {doc}`../api/index` +* Browse the [examples directory](https://github.com/pythonbpf/Python-BPF/tree/master/examples) and the [BCC examples directory](https://github.com/pythonbpf/Python-BPF/tree/master/BCC-Examples) + +## Need Help? + +If you encounter any issues: + +* Check the [GitHub Issues](https://github.com/pythonbpf/Python-BPF/issues) for known problems +* Review the [README](https://github.com/pythonbpf/Python-BPF/blob/master/README.md) for additional information +* Reach out to the maintainers: [@r41k0u](https://github.com/r41k0u) and [@varun-r-mallya](https://github.com/varun-r-mallya) diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md new file mode 100644 index 0000000..23ee7af --- /dev/null +++ b/docs/getting-started/installation.md @@ -0,0 +1,182 @@ +# Installation + +This guide will walk you through installing PythonBPF and its dependencies. + +## Prerequisites + +### System Requirements + +PythonBPF requires: + +* **Linux** - eBPF is a Linux kernel feature (kernel 4.15 or higher recommended) +* **Python 3.10+** - Python 3.10 or higher is required +* **Root/sudo access** - Loading BPF programs into the kernel requires elevated privileges + +### Required System Packages + +Before installing PythonBPF, you need to install the following system packages: + +#### On Ubuntu/Debian: + +```bash +sudo apt-get update +sudo apt-get install -y bpftool clang llvm +``` + +#### On Fedora/RHEL/CentOS: + +```bash +sudo dnf install -y bpftool clang llvm +``` + +#### On Arch Linux: + +```bash +sudo pacman -S bpf clang llvm +``` + +## Installing PythonBPF + +### From PyPI (Recommended) + +The easiest way to install PythonBPF is using uv or pip: + +**Using uv (recommended):** +```bash +uv pip install pythonbpf pylibbpf +``` + +**Using pip:** +```bash +pip install pythonbpf pylibbpf +``` + +This will install: +* `pythonbpf` - The main package for writing and compiling BPF programs +* `pylibbpf` - Python bindings for libbpf, used to load and attach BPF programs + +### Development Installation + +If you want to contribute to PythonBPF or work with the latest development version: + +1. Clone the repository: + +```bash +git clone https://github.com/pythonbpf/Python-BPF.git +cd Python-BPF +``` + +2. Create and activate a virtual environment: + +```bash +python3 -m venv .venv +source .venv/bin/activate # On Windows: .venv\Scripts\activate +``` + +3. Install in development mode: + +**Using uv (recommended):** +```bash +uv pip install -e . +uv pip install pylibbpf +``` + +**Using pip:** +```bash +pip install -e . +pip install pylibbpf +``` + +4. Install development dependencies: + +```bash +make install +``` + +### Installing Documentation Dependencies + +If you want to build the documentation locally: + +**Using uv (recommended):** +```bash +uv pip install pythonbpf[docs] +# Or from the repository root: +uv pip install -e .[docs] +``` + +**Using pip:** +```bash +pip install pythonbpf[docs] +# Or from the repository root: +pip install -e .[docs] +``` + +## Generating vmlinux.py + +`vmlinux.py` contains the running kernel's data structures and is analogous to `vmlinux.h` included in eBPF programs written in C. Some examples require access to it. To use these features, you need to generate a `vmlinux.py` file: + +1. Install additional dependencies: + +**Using uv (recommended):** +```bash +uv pip install ctypeslib2 +``` + +**Using pip:** +```bash +pip install ctypeslib2 +``` + +2. Generate the vmlinux.py file: + +```bash +sudo tools/vmlinux-gen.py +``` + +3. Copy the generated file to your working directory or the examples directory as needed. + +```{warning} +The `vmlinux.py` file is kernel-specific. If you upgrade your kernel, you may need to regenerate this file. +``` + +## Verifying Installation + +To verify that PythonBPF is installed correctly, run: + +```bash +python3 -c "import pythonbpf; print(pythonbpf.__all__)" +``` + +You should see output similar to: + +``` +['bpf', 'map', 'section', 'bpfglobal', 'struct', 'compile_to_ir', 'compile', 'BPF', 'trace_pipe', 'trace_fields'] +``` + +## Troubleshooting + +### Permission Errors + +If you encounter permission errors when running BPF programs: + +* Make sure you're running with `sudo` or as root +* Check that `/sys/kernel/tracing/` is accessible + +### LLVM/Clang Not Found + +If you get errors about `llc` or `clang` not being found: + +* Verify they're installed: `which llc` and `which clang` +* Check your PATH environment variable includes the LLVM bin directory + +### Import Errors + +If Python can't find the `pythonbpf` module: + +* Make sure you've activated your virtual environment +* Verify installation with `uv pip list | grep pythonbpf` or `pip list | grep pythonbpf` +* Try reinstalling: `uv pip install --force-reinstall pythonbpf` or `pip install --force-reinstall pythonbpf` + +## Next Steps + +Now that you have PythonBPF installed, continue to the {doc}`quickstart` guide to write your first BPF program! diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md new file mode 100644 index 0000000..2283adf --- /dev/null +++ b/docs/getting-started/quickstart.md @@ -0,0 +1,249 @@ +# Quick Start + +This guide will walk you through creating your first BPF program with PythonBPF. + +## Your First BPF Program + +Let's create a simple "Hello World" program that prints a message every time a process is executed on your system. + +### Step 1: Create the Program + +Create a new file called `hello_world.py`: + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello_world(ctx: c_void_p) -> c_int64: + print("Hello, World!") + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load() +b.attach_all() +trace_pipe() +``` + +### Step 2: Run the Program + +Run the program with sudo (required for BPF operations): + +```bash +sudo python3 hello_world.py +``` + +### Step 3: See it in Action + +Open another terminal and run any command: + +```bash +ls +echo "test" +date +``` + +You should see "Hello, World!" printed in the first terminal for each command executed! + +Press `Ctrl+C` to stop the program. + +## Understanding the Code + +Let's break down what each part does: + +### Imports + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from ctypes import c_void_p, c_int64 +``` + +* `bpf` - Decorator to mark functions for BPF compilation +* `section` - Decorator to specify which kernel event to attach to +* `bpfglobal` - Decorator for BPF global variables +* `BPF` - Class to compile, load, and attach BPF programs +* `trace_pipe` - Utility to read kernel trace output (similar to BCC) +* `c_void_p`, `c_int64` - C types for function signatures + +### The BPF Function + +```python +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello_world(ctx: c_void_p) -> c_int64: + print("Hello, World!") + return 0 +``` + +* `@bpf` - Marks this function to be compiled to BPF bytecode +* `@section("tracepoint/syscalls/sys_enter_execve")` - Attaches to the execve syscall tracepoint (called when processes start) +* `ctx: c_void_p` - Context parameter (required for all BPF functions) +* `print()` - the PythonBPF API for `bpf_printk` helper function +* `return 0` - BPF functions must return an integer + +### License Declaration + +```python +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" +``` + +* The Linux kernel requires BPF programs to declare a license +* Most kernel features require GPL-compatible licenses +* This is defined as a BPF global variable + +### Compilation and Execution + +```python +b = BPF() +b.load() +b.attach_all() +trace_pipe() +``` + +* `BPF()` - Creates a BPF object and compiles the current file +* `b.load()` - Loads the compiled BPF program into the kernel +* `b.attach_all()` - Attaches all BPF programs to their specified hooks +* `trace_pipe()` - Reads and displays output from the kernel trace buffer + +Alternatively, you can also use the `compile()` function to compile the BPF code to an object file: + +```python +from pythonbpf import compile +``` + +This object file can then be loaded using any other userspace library in any language. + +## Next Example: Tracking Process IDs + +Let's make a more interesting program that tracks which processes are being created: + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from pythonbpf.helper import pid +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def track_exec(ctx: c_void_p) -> c_int64: + process_id = pid() + print(f"Process with PID: {process_id} is starting") + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load() +b.attach_all() +trace_pipe() +``` + +This program uses BPF helper functions: + +* `pid()` - Gets the current process ID + +Run it with `sudo python3 track_exec.py` and watch processes being created! + +## Common Patterns + +### Tracepoints + +Tracepoints are predefined hooks in the kernel. Common ones include: + +```python +# System calls +@section("tracepoint/syscalls/sys_enter_execve") +@section("tracepoint/syscalls/sys_enter_clone") +@section("tracepoint/syscalls/sys_enter_open") + +# Scheduler events +@section("tracepoint/sched/sched_process_fork") +@section("tracepoint/sched/sched_switch") +``` + +### Kprobes + +Kprobes allow you to attach to any kernel function: + +```python +@section("kprobe/do_sys_open") +def trace_open(ctx: c_void_p) -> c_int64: + print("File is being opened") + return 0 +``` + +### XDP (eXpress Data Path) + +For network packet processing: + +```python +from pythonbpf.helper import XDP_PASS + +@section("xdp") +def xdp_pass(ctx: c_void_p) -> c_int64: + return XDP_PASS +``` + +## Best Practices + +1. **Always include a LICENSE** - Required by the kernel +2. **Use type hints** - Required by PythonBPF to generate correct code +3. **Return the correct type** - Match the expected return type for your program type +4. **Test incrementally** - Start simple and add complexity gradually +5. **Check kernel logs** - Use `dmesg` to see BPF verifier messages if loading fails + +## Common Issues + +### Program Won't Load + +If your BPF program fails to load: + +* Check `dmesg` for verifier error messages +* Ensure your LICENSE is GPL-compatible +* Verify you're using supported BPF features +* Make sure return types match function signatures + +### No Output + +If you don't see output: + +* Verify the tracepoint/kprobe is being triggered +* Check that you're running with sudo +* Ensure `/sys/kernel/tracing/trace_pipe` is accessible + +### Compilation Errors + +If compilation fails: + +* Check that `llc` is installed and in your PATH +* Verify your Python syntax is correct +* Ensure all imported types are from `ctypes` +* In the worst case, compile object files manually using `compile_to_ir()` and `llc` to get detailed errors + +### Verification Failure + +If verification fails: + +* Compile the object files using `compile()` function instead of loading directly +* Run `sudo check.sh check .o` to get detailed verification output + +## Next Steps + +Now that you understand the basics, explore: + +* {doc}`../user-guide/decorators` - Learn about all available decorators +* {doc}`../user-guide/maps` - Use BPF maps for data storage and communication +* {doc}`../user-guide/structs` - Define custom data structures +* {doc}`../user-guide/helpers` - Discover all available BPF helper functions +* [Examples directory](https://github.com/pythonbpf/Python-BPF/tree/master/examples) - See more complex examples diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..920edd2 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,95 @@ +# PythonBPF Documentation + +Welcome to **PythonBPF** - a Python frontend for writing eBPF programs without embedding C code. PythonBPF uses [llvmlite](https://github.com/numba/llvmlite) to generate LLVM IR and compiles directly to eBPF object files that can be loaded into the Linux kernel. + +```{note} +This project is under active development. +``` + +## What is PythonBPF? + +PythonBPF is an LLVM IR generator for eBPF programs written in Python. It provides: + +* **Pure Python syntax** - Write eBPF programs in Python using familiar decorators and type annotations +* **Direct compilation** - Compile to LLVM object files without relying on BCC +* **Full eBPF features** - Support for maps, helpers, global definitions, and more +* **Integration with libbpf** - Works with [pylibbpf](https://github.com/pythonbpf/pylibbpf) for object loading and execution + +## Quick Example + +Here's a simple "Hello World" BPF program that traces process creation: + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def hello_world(ctx: c_void_p) -> c_int64: + print("Hello, World!") + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load() +b.attach_all() +trace_pipe() +``` + +## Features + +* Generate eBPF programs directly using Python syntax +* Compile to LLVM object files for kernel execution +* Built with `llvmlite` for IR generation +* Supports maps, helpers, and global definitions for BPF +* Companion project: [pylibbpf](https://github.com/pythonbpf/pylibbpf), which provides bindings for libbpf + +## Table of Contents + +```{toctree} +:maxdepth: 2 +:caption: Getting Started + +getting-started/index +getting-started/installation +getting-started/quickstart +``` + +```{toctree} +:maxdepth: 2 +:caption: User Guide + +user-guide/index +user-guide/decorators +user-guide/maps +user-guide/structs +user-guide/compilation +user-guide/helpers +``` + +```{toctree} +:maxdepth: 2 +:caption: API Reference + +api/index +``` + +## Links + +* **GitHub Repository**: [pythonbpf/Python-BPF](https://github.com/pythonbpf/Python-BPF) +* **PyPI Package**: [pythonbpf](https://pypi.org/project/pythonbpf/) +* **Video Demo**: [YouTube](https://www.youtube.com/watch?v=eFVhLnWFxtE) + +## License + +PythonBPF is licensed under the Apache License 2.0. + +## Indices and tables + +* {ref}`genindex` +* {ref}`modindex` +* {ref}`search` diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..a05ddbc --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,4 @@ +myst-parser>=2.0 +sphinx>=7.0 +sphinx-copybutton +sphinx-rtd-theme>=2.0 diff --git a/docs/user-guide/compilation.md b/docs/user-guide/compilation.md new file mode 100644 index 0000000..0a58f2e --- /dev/null +++ b/docs/user-guide/compilation.md @@ -0,0 +1,432 @@ +# Compilation + +PythonBPF provides several functions and classes for compiling Python code into BPF bytecode and loading it into the kernel. + +## Overview + +The compilation process transforms Python code into executable BPF programs: + +1. **Python AST** → LLVM IR generation (using llvmlite) +2. **LLVM IR** → BPF bytecode (using llc) +3. **BPF Object** → Kernel loading (using libbpf) + +## Compilation Functions + +### compile_to_ir() + +Compile Python source to LLVM Intermediate Representation. + +#### Signature + +```python +def compile_to_ir(filename: str, output: str, loglevel=logging.WARNING) +``` + +#### Parameters + +* `filename` - Path to the Python source file to compile +* `output` - Path where the LLVM IR file (.ll) should be written +* `loglevel` - Logging level (default: `logging.WARNING`) + +#### Usage + +```python +from pythonbpf import compile_to_ir +import logging + +# Compile to LLVM IR +compile_to_ir( + filename="my_bpf_program.py", + output="my_bpf_program.ll", + loglevel=logging.DEBUG +) +``` + +#### Output + +This function generates an `.ll` file containing LLVM IR, which is human-readable assembly-like code. This is useful for: + +* Debugging compilation issues +* Understanding code generation + +### compile() + +Compile Python source to BPF object file. + +#### Signature + +```python +def compile(filename: str = None, output: str = None, loglevel=logging.WARNING) +``` + +#### Parameters + +* `filename` - Path to the Python source file (default: calling file) +* `output` - Path for the output object file (default: same name with `.o` extension) +* `loglevel` - Logging level (default: `logging.WARNING`) + +#### Usage + +```python +from pythonbpf import compile +import logging + +# Compile current file +compile() + +# Compile specific file +compile(filename="my_program.py", output="my_program.o") + +# Compile with debug logging +compile(loglevel=logging.DEBUG) +``` + +#### Output + +This function generates a `.o` file containing BPF bytecode that can be: + +* Loaded into the kernel +* Inspected with `bpftool` +* Verified with the BPF verifier +* Distributed as a compiled binary + +### BPF Class + +The `BPF` class provides a high-level interface to compile, load, and attach BPF programs. + +#### Signature + +```python +class BPF: + def __init__(self, filename: str = None, loglevel=logging.WARNING) + def load(self) + def attach_all(self) + def load_and_attach(self) +``` + +#### Parameters + +* `filename` - Path to Python source file (default: calling file) +* `loglevel` - Logging level (default: `logging.WARNING`) + +#### Methods + +##### __init__() + +Create a BPF object and compile the source. + +```python +from pythonbpf import BPF + +# Compile current file +b = BPF() + +# Compile specific file +b = BPF(filename="my_program.py") +``` + +##### load() + +Load the compiled BPF program into the kernel. + +```python +b = BPF() +b.load() +``` + +This method: +* Loads the BPF object file into the kernel +* Creates maps +* Verifies the BPF program +* Returns a `BpfObject` instance + +##### attach_all() + +Attach all BPF programs to their specified hooks. + +```python +b = BPF() +b.load() +b.attach_all() +``` + +This method: +* Attaches tracepoints +* Attaches kprobes/kretprobes +* Attaches XDP programs +* Enables all hooks + +##### load_and_attach() + +Convenience method that loads and attaches in one call. + +```python +b = BPF() +b.load_and_attach() +``` + +Equivalent to: +```python +b = BPF() +b.load() +b.attach_all() +``` + +## Complete Example + +Here's a complete example showing the compilation workflow: + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def trace_exec(ctx: c_void_p) -> c_int64: + print("Process started") + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +if __name__ == "__main__": + # Method 1: Simple compilation and loading + b = BPF() + b.load_and_attach() + trace_pipe() + + # Method 2: Step-by-step + # b = BPF() + # b.load() + # b.attach_all() + # trace_pipe() + + # Method 3: Manual compilation + # from pythonbpf import compile + # compile(filename="my_program.py", output="my_program.o") + # # Then load with pylibbpf directly +``` + +## Compilation Pipeline Details + +### AST Parsing + +The Python `ast` module parses your source code: + +```python +import ast +tree = ast.parse(source_code, filename) +``` + +The AST is then walked to find: +* Functions decorated with `@bpf` +* Classes decorated with `@struct` +* Map definitions with `@map` +* Global variables with `@bpfglobal` + +### IR Generation + +PythonBPF uses `llvmlite` to generate LLVM IR: + +```python +from llvmlite import ir + +# Create module +module = ir.Module(name='bpf_module') +module.triple = 'bpf' + +# Generate IR for each BPF function +# ... +``` + +Key aspects of IR generation: + +* Type conversion (Python types → LLVM types) +* Function definitions +* Map declarations +* Global variable initialization +* Debug information + +### BPF Compilation + +The LLVM IR is compiled to BPF bytecode using `llc`: + +```bash +llc -march=bpf -filetype=obj input.ll -o output.o +``` + +### Kernel Loading + +The compiled object is loaded using `pylibbpf`: + +```python +from pylibbpf import BpfObject + +obj = BpfObject(path="program.o") +obj.load() +``` + +## Debugging Compilation + +### Logging + +Enable debug logging to see compilation details: + +```python +import logging +from pythonbpf import BPF + +b = BPF(loglevel=logging.DEBUG) +``` + +This will show: +* AST parsing details +* IR generation steps +* Compilation commands +* Loading status + +### Inspecting LLVM IR + +Generate and inspect the IR file: + +```python +from pythonbpf import compile_to_ir + +compile_to_ir("program.py", "program.ll") +``` + +Then examine `program.ll` to understand the generated code. + +### Using bpftool + +Inspect compiled objects with `bpftool`: + +```bash +# Show program info +bpftool prog show + +# Dump program instructions +bpftool prog dump xlated id + +# Dump program JIT code +bpftool prog dump jited id + +# Show maps +bpftool map show + +# Dump map contents +bpftool map dump id +``` + +### Verifier Errors + +If the kernel verifier rejects your program: + +* Check `dmesg` for detailed error messages: + ```bash + sudo dmesg | tail -50 + ``` + +## Compilation Options + +### Optimization Levels + +While PythonBPF doesn't expose optimization flags directly, you can: + +1. Manually compile IR with specific flags: + ```bash + llc -march=bpf -O2 -filetype=obj program.ll -o program.o + ``` + +2. Modify the compilation pipeline in your code + +### Debug Information + +PythonBPF automatically generates debug information (DWARF) for: + +* Function names +* Variable names +* Type information + +This helps with: +* Stack traces +* Debugging with `bpftool` +* Source-level debugging + +## Working with Compiled Objects + +### Loading Pre-compiled Objects + +You can load previously compiled objects: + +```python +from pylibbpf import BpfObject + +# Load object file +obj = BpfObject(path="my_program.o") +obj.load() + +# Attach programs +# (specific attachment depends on program type) +``` + +### Distribution + +Distribute compiled BPF objects: + +1. Compile once: + ```python + from pythonbpf import compile + compile(filename="program.py", output="program.o") + ``` + +2. Ship `program.o` file + +3. Load on target systems: + ```python + from pylibbpf import BpfObject + obj = BpfObject(path="program.o") + obj.load() + ``` + +### Version Compatibility + +BPF objects are generally compatible across kernel versions, but: + +* Some features require specific kernel versions +* Helper functions may not be available on older kernels +* BTF (BPF Type Format) requirements vary + +## Troubleshooting + +### Compilation Fails + +If compilation fails: +* Check Python syntax +* Verify all decorators are correct +* Ensure type hints are present +* Check for unsupported Python features + +### Loading Fails + +If loading fails: +* Check `dmesg` for verifier errors +* Verify LICENSE is set correctly +* Ensure helper functions are valid +* Check map definitions + +### Programs Don't Attach + +If attachment fails: +* Verify section names are correct +* Check that hooks exist on your kernel +* Ensure you have sufficient permissions +* Verify kernel version supports the feature + +## Next Steps + +* Learn about {doc}`helpers` for available BPF helper functions +* Explore {doc}`maps` for data storage +* See {doc}`decorators` for compilation markers diff --git a/docs/user-guide/decorators.md b/docs/user-guide/decorators.md new file mode 100644 index 0000000..ff8b242 --- /dev/null +++ b/docs/user-guide/decorators.md @@ -0,0 +1,448 @@ +# Decorators + +Decorators are the primary way to mark Python code for BPF compilation. PythonBPF provides five core decorators that control how your code is transformed into eBPF bytecode. + +## @bpf + +The `@bpf` decorator marks functions or classes for BPF compilation. + +### Usage + +```python +from pythonbpf import bpf + +@bpf +def my_function(ctx): + # This function will be compiled to BPF bytecode + pass +``` + +### Description + +Any function or class decorated with `@bpf` will be processed by the PythonBPF compiler and transformed into LLVM IR, then compiled to BPF bytecode. This is the fundamental decorator that enables BPF compilation. + +### Rules + +* Must be used on top-level functions or classes +* The function must have proper type hints +* Return types must be BPF-compatible +* Only BPF-compatible operations are allowed inside + +### Example + +```python +from pythonbpf import bpf, section +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def trace_exec(ctx: c_void_p) -> c_int64: + print("Process started") + return c_int64(0) +``` + +## @section + +The `@section(name)` decorator specifies which kernel hook to attach the BPF program to. + +### Usage + +```python +from pythonbpf import bpf, section + +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def trace_open(ctx): + pass +``` + +### Section Types + +#### Tracepoints + +Tracepoints are stable kernel hooks defined in `/sys/kernel/tracing/events/`: + +```python +# System call tracepoints +@section("tracepoint/syscalls/sys_enter_execve") +@section("tracepoint/syscalls/sys_enter_clone") +@section("tracepoint/syscalls/sys_enter_open") +@section("tracepoint/syscalls/sys_exit_read") + +# Scheduler tracepoints +@section("tracepoint/sched/sched_process_fork") +@section("tracepoint/sched/sched_process_exit") +@section("tracepoint/sched/sched_switch") + +# Block I/O tracepoints +@section("tracepoint/block/block_rq_insert") +@section("tracepoint/block/block_rq_complete") +``` + +#### Kprobes + +Kprobes allow attaching to any kernel function: + +```python +@section("kprobe/do_sys_open") +def trace_sys_open(ctx): + pass + +@section("kprobe/__x64_sys_write") +def trace_write(ctx): + pass +``` + +#### Kretprobes + +Kretprobes trigger when a kernel function returns: + +```python +@section("kretprobe/do_sys_open") +def trace_open_return(ctx): + pass +``` + +#### XDP (eXpress Data Path) + +For network packet processing at the earliest point: + +```python +from pythonbpf.helper import XDP_PASS +from ctypes import c_void_p, c_int64 + +@section("xdp") +def xdp_prog(ctx: c_void_p) -> c_int64: + # XDP_PASS, XDP_DROP, XDP_ABORTED constants available from pythonbpf.helper + return XDP_PASS +``` + +### Finding Tracepoints + +To find available tracepoints on your system: + +```bash +# List all tracepoints +ls /sys/kernel/tracing/events/ + +# List syscall tracepoints +ls /sys/kernel/tracing/events/syscalls/ + +# View tracepoint format +cat /sys/kernel/tracing/events/syscalls/sys_enter_open/format +``` + +## @map + +The `@map` decorator marks a function as a BPF map definition. + +### Usage + +```python +from pythonbpf import bpf, map +from pythonbpf.maps import HashMap +from ctypes import c_uint32, c_uint64 + +@bpf +@map +def my_map() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=1024) +``` + +### Description + +Maps are BPF data structures used to: + +* Store state between BPF program invocations +* Communicate data between BPF programs +* Share data with userspace + +The function must return a map type (HashMap, PerfEventArray, RingBuffer) and the return type must be annotated. + +### Example + +```python +from pythonbpf import bpf, map, section +from pythonbpf.maps import HashMap +from pythonbpf.helper import pid +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@map +def process_count() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=4096) + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def count_clones(ctx: c_void_p) -> c_int64: + process_id = pid() + count = process_count.lookup(process_id) + if count: + process_count.update(process_id, count + 1) + else: + process_count.update(process_id, c_uint64(1)) + return 0 +``` + +See {doc}`maps` for more details on available map types. + +## @struct + +The `@struct` decorator marks a class as a BPF struct definition. + +### Usage + +```python +from pythonbpf import bpf, struct +from ctypes import c_uint64, c_uint32 + +@bpf +@struct +class Event: + timestamp: c_uint64 + pid: c_uint32 + cpu: c_uint32 +``` + +### Description + +Structs allow you to define custom data types for use in BPF programs. They can be used: + +* As map keys and values +* For perf event output +* In ring buffer submissions +* As local variables + +### Field Types + +Supported field types include: + +* **Integer types**: `c_int8`, `c_int16`, `c_int32`, `c_int64`, `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64` +* **Pointers**: `c_void_p`, `c_char_p` +* **Fixed strings**: `str(N)` where N is the size (e.g., `str(16)`) +* **Nested structs**: Other `@struct` decorated classes + +### Example + +```python +from pythonbpf import bpf, struct, map, section +from pythonbpf.maps import RingBuffer +from pythonbpf.helper import pid, ktime +from ctypes import c_void_p, c_int64, c_uint64, c_uint32 + +@bpf +@struct +class ProcessEvent: + timestamp: c_uint64 + pid: c_uint32 + comm: str(16) + +@bpf +@map +def events() -> RingBuffer: + return RingBuffer(max_entries=4096) + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def track_processes(ctx: c_void_p) -> c_int64: + event = ProcessEvent() + event.timestamp = ktime() + event.pid = pid() + comm(event.comm) # Fills event.comm with process name + + events.output(event) + return 0 +``` + +See {doc}`structs` for more details on working with structs. + +## @bpfglobal + +The `@bpfglobal` decorator marks a function as a BPF global variable definition. + +### Usage + +```python +from pythonbpf import bpf, bpfglobal + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" +``` + +### Description + +BPF global variables are values that: + +* Are initialized when the program loads +* Can be read by all BPF functions +* Must be constant (cannot be modified at runtime in current implementation) + +### Common Global Variables + +#### LICENSE (Required) + +Every BPF program must declare a license: + +```python +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" +``` + +Valid licenses include: +* `"GPL"` - GNU General Public License +* `"GPL v2"` - GPL version 2 +* `"Dual BSD/GPL"` - Dual licensed +* `"Dual MIT/GPL"` - Dual licensed + +```{warning} +Many BPF features require a GPL-compatible license. Using a non-GPL license may prevent your program from loading or accessing certain kernel features. +``` + +#### Custom Global Variables + +You can define other global variables: + +```python +@bpf +@bpfglobal +def DEBUG_MODE() -> int: + return 1 + +@bpf +@bpfglobal +def MAX_EVENTS() -> int: + return 1000 +``` + +These can be referenced in your BPF functions, though modifying them at runtime is currently not supported. + +## Combining Decorators + +Decorators are often used together. The order matters: + +### Correct Order + +```python +@bpf # Always first +@section("...") # Section before other decorators +def my_function(): + pass + +@bpf # Always first +@map # Map/struct/bpfglobal after @bpf +def my_map(): + pass + +@bpf # Always first +@struct # Map/struct/bpfglobal after @bpf +class MyStruct: + pass + +@bpf # Always first +@bpfglobal # Map/struct/bpfglobal after @bpf +def LICENSE(): + return "GPL" +``` + +### Examples by Use Case + +#### Simple Tracepoint + +```python +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def trace_open(ctx: c_void_p) -> c_int64: + return c_int64(0) +``` + +#### Map Definition + +```python +@bpf +@map +def counters() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=256) +``` + +#### Struct Definition + +```python +@bpf +@struct +class Event: + timestamp: c_uint64 + value: c_uint32 +``` + +#### Global Variable + +```python +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" +``` + +## Best Practices + +1. **Always use @bpf first** - It must be the outermost decorator +2. **Provide type hints** - Required for proper code generation +3. **Test incrementally** - Verify each component works before combining + +## Common Errors + +### Missing @bpf Decorator + +```python +# Wrong - missing @bpf +@section("tracepoint/syscalls/sys_enter_open") +def my_func(ctx): + pass + +# Correct +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def my_func(ctx): + pass +``` + +### Wrong Decorator Order + +```python +# Wrong - @section before @bpf +@section("tracepoint/syscalls/sys_enter_open") +@bpf +def my_func(ctx): + pass + +# Correct +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def my_func(ctx): + pass +``` + +### Missing Type Hints + +```python +# Wrong - no type hints +@bpf +def my_func(ctx): + pass + +# Correct +@bpf +def my_func(ctx: c_void_p) -> c_int64: + pass +``` + +## Next Steps + +* Learn about {doc}`maps` for data storage and communication +* Explore {doc}`structs` for defining custom data types +* Understand {doc}`compilation` to see how code is transformed +* Check out {doc}`helpers` for available BPF helper functions diff --git a/docs/user-guide/helpers.md b/docs/user-guide/helpers.md new file mode 100644 index 0000000..acc0873 --- /dev/null +++ b/docs/user-guide/helpers.md @@ -0,0 +1,503 @@ +# Helper Functions and Utilities + +PythonBPF provides helper functions and utilities for BPF programs and userspace code. + +```{note} +**Work in Progress:** PythonBPF is under active development. We are constantly adding support for more helpers, kfuncs, and map types. Check back for updates! +``` +For comprehensive documentation on BPF helpers, see the [eBPF Helper Functions documentation on ebpf.io](https://ebpf.io/what-is-ebpf/#helper-calls). + +## BPF Helper Functions + +BPF helper functions are kernel-provided functions that BPF programs can call to interact with the system. PythonBPF exposes these through the `pythonbpf.helper` module. + +```python +from pythonbpf.helper import pid, ktime, comm +``` + +### Process and Task Information + +#### pid() + +Get the current process ID. + +> **Linux Kernel Helper:** `bpf_get_current_pid_tgid()` + +```python +from pythonbpf.helper import pid + +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def trace_open(ctx: c_void_p) -> c_int64: + process_id = pid() + print(f"Process {process_id} opened a file") + return 0 +``` + +**Returns:** `c_int32` - The process ID of the current task + +#### comm() + +Get the current process command name. + +> **Linux Kernel Helper:** `bpf_get_current_comm()` + +**Parameters:** +* `buf` - Buffer to fill with the process command name + +**Returns:** `c_int64` - 0 on success, negative on error + +#### uid() + +Get the current user ID. + +> **Linux Kernel Helper:** `bpf_get_current_uid_gid()` + +```python +from pythonbpf.helper import uid + +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def trace_open(ctx: c_void_p) -> c_int64: + user_id = uid() + if user_id == 0: + print("Root user opened a file") + return 0 +``` + +**Returns:** `c_int32` - The user ID of the current task + +### Time and Timing + +#### ktime() + +Get the current kernel time in nanoseconds since system boot. + +> **Linux Kernel Helper:** `bpf_ktime_get_ns()` + +```python +from pythonbpf.helper import ktime + +@bpf +@section("tracepoint/syscalls/sys_enter_read") +def measure_latency(ctx: c_void_p) -> c_int64: + start_time = ktime() + # Store for later comparison + return 0 +``` + +**Returns:** `c_int64` - Current time in nanoseconds + +**Use cases:** +* Measuring latency +* Timestamping events +* Rate limiting +* Timeout detection + +### CPU Information + +#### smp_processor_id() + +Get the ID of the CPU on which the BPF program is running. + +> **Linux Kernel Helper:** `bpf_get_smp_processor_id()` + +```python +from pythonbpf.helper import smp_processor_id + +@bpf +@section("tracepoint/sched/sched_switch") +def track_cpu(ctx: c_void_p) -> c_int64: + cpu = smp_processor_id() + print(f"Running on CPU {cpu}") + return 0 +``` + +**Returns:** `c_int32` - The current CPU ID + +**Use cases:** +* Per-CPU statistics +* Load balancing analysis +* CPU affinity tracking + +### Memory Operations + +#### probe_read() + +Safely read data from kernel memory. + +> **Linux Kernel Helper:** `bpf_probe_read()` + +```python +from pythonbpf.helper import probe_read + +@bpf +def read_kernel_data(ctx: c_void_p) -> c_int64: + dst = 0 + size = 8 + src = ctx # kernel address + + result = probe_read(dst, size, src) + if result == 0: + print(f"Read value: {dst}") + return 0 +``` + +**Parameters:** +* `dst` - Destination buffer +* `size` - Number of bytes to read +* `src` - Source kernel address + +**Returns:** `c_int64` - 0 on success, negative on error + +**Safety:** This function performs bounds checking and prevents invalid memory access. + +#### probe_read_str() + +Safely read a null-terminated string from kernel memory. + +> **Linux Kernel Helper:** `bpf_probe_read_str()` + +**Parameters:** +* `dst` - Destination buffer (string) +* `src` - Source kernel address + +**Returns:** `c_int64` - Length of string on success, negative on error + +### Random Numbers + +#### random() + +Generate a pseudo-random 32-bit number. + +> **Linux Kernel Helper:** `bpf_get_prandom_u32()` + +```python +from pythonbpf.helper import random + +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def sample_events(ctx: c_void_p) -> c_int64: + # Sample 1% of events + if (random() % 100) == 0: + print("Sampled event") + return 0 +``` + +**Returns:** `c_int32` - A pseudo-random number + +### Network Helpers + +#### skb_store_bytes() + +Store bytes into a socket buffer (for network programs). + +> **Linux Kernel Helper:** `bpf_skb_store_bytes()` + +```python +from pythonbpf.helper import skb_store_bytes + +@bpf +@section("classifier") +def modify_packet(ctx: c_void_p) -> c_int32: + offset = 14 # Skip Ethernet header + data = b"\x00\x01\x02\x03" + size = len(data) + + result = skb_store_bytes(offset, data, size) + return 0 +``` + +**Parameters:** +* `offset` - Offset in the socket buffer +* `from_buf` - Data to write +* `size` - Number of bytes to write +* `flags` - Optional flags + +**Returns:** `c_int64` - 0 on success, negative on error + +## Userspace Utilities + +PythonBPF provides utilities for working with BPF programs from Python userspace code. + +### trace_pipe() + +Read and display output from the kernel trace pipe. + +```python +from pythonbpf import trace_pipe + +# After loading and attaching BPF programs +trace_pipe() +``` + +**Description:** + +The `trace_pipe()` function reads from `/sys/kernel/tracing/trace_pipe` and displays BPF program output to stdout. This is the output from `print()` statements in BPF programs. + +**Usage:** + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def trace_exec(ctx: c_void_p) -> c_int64: + print("Process started") # This goes to trace_pipe + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() +trace_pipe() # Display BPF output +``` + +**Behavior:** + +* Blocks until Ctrl+C is pressed +* Displays output in real-time +* Shows task name, PID, CPU, timestamp, and message +* Automatically handles trace pipe access errors + +**Requirements:** + +* Root or sudo access +* Accessible `/sys/kernel/tracing/trace_pipe` + +### trace_fields() + +Parse one line from the trace pipe into structured fields. + +```python +from pythonbpf import trace_fields + +# Read and parse trace output +task, pid, cpu, flags, ts, msg = trace_fields() +print(f"Task: {task}, PID: {pid}, CPU: {cpu}, Time: {ts}, Message: {msg}") +``` + +**Returns:** Tuple of `(task, pid, cpu, flags, timestamp, message)` + +* `task` - String: Task/process name (up to 16 chars) +* `pid` - Integer: Process ID +* `cpu` - Integer: CPU number +* `flags` - Bytes: Trace flags +* `timestamp` - Float: Timestamp in seconds +* `message` - String: The actual trace message + +**Description:** + +The `trace_fields()` function reads one line from the trace pipe and parses it into individual fields. This is useful when you need programmatic access to trace data rather than just displaying it. + +**Usage:** + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_fields +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def trace_exec(ctx: c_void_p) -> c_int64: + print(f"PID:{pid()}") + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() + +# Process trace events +try: + while True: + task, pid, cpu, flags, ts, msg = trace_fields() + print(f"[{ts:.6f}] {task}({pid}) on CPU{cpu}: {msg}") +except KeyboardInterrupt: + print("Stopped") +``` + +**Error Handling:** + +* Raises `ValueError` if line cannot be parsed +* Skips lines about lost events +* Blocks waiting for next line + +## Helper Function Examples + +### Example 1: Latency Measurement + +```python +from pythonbpf import bpf, map, section, bpfglobal, BPF, trace_pipe +from pythonbpf.maps import HashMap +from pythonbpf.helper import pid, ktime +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@map +def start_times() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=4096) + +@bpf +@section("tracepoint/syscalls/sys_enter_read") +def read_start(ctx: c_void_p) -> c_int64: + process_id = pid() + start = ktime() + start_times.update(process_id, start) + return 0 + +@bpf +@section("tracepoint/syscalls/sys_exit_read") +def read_end(ctx: c_void_p) -> c_int64: + process_id = pid() + start = start_times.lookup(process_id) + + if start: + latency = ktime() - start + print(f"Read latency: {latency} ns") + start_times.delete(process_id) + + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() +trace_pipe() +``` + +### Example 2: Process Tracking + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from pythonbpf.helper import pid, uid +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def track_exec(ctx: c_void_p) -> c_int64: + process_id = pid() + user_id = uid() + + print(f"User {user_id} started process (PID: {process_id})") + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() +trace_pipe() +``` + +### Example 3: CPU Load Monitoring + +```python +from pythonbpf import bpf, map, section, bpfglobal, BPF +from pythonbpf.maps import HashMap +from pythonbpf.helper import smp_processor_id +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@map +def cpu_counts() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=256) + +@bpf +@section("tracepoint/sched/sched_switch") +def count_switches(ctx: c_void_p) -> c_int64: + cpu = smp_processor_id() + count = cpu_counts.lookup(cpu) + + if count: + cpu_counts.update(cpu, count + 1) + else: + cpu_counts.update(cpu, 1) + + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() + +import time +time.sleep(5) + +# Read results +from pylibbpf import BpfMap +map_obj = BpfMap(b, cpu_counts) +for cpu, count in map_obj.items(): + print(f"CPU {cpu}: {count} context switches") +``` + +### Example 4: Event Sampling + +```python +from pythonbpf import bpf, section, bpfglobal, BPF, trace_pipe +from pythonbpf.helper import random, pid +from ctypes import c_void_p, c_int64 + +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def sample_opens(ctx: c_void_p) -> c_int64: + # Sample 5% of events + if (random() % 100) < 5: + process_id = pid() + print(f"Sampled: PID {process_id} opening file") + + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +b = BPF() +b.load_and_attach() +trace_pipe() +``` + +## Troubleshooting + +### Helper Not Available + +If a helper function doesn't work: +* Check your kernel version (some helpers are newer) +* Ensure your LICENSE is GPL-compatible + +### Trace Pipe Access Denied + +If `trace_pipe()` fails: +* Run with sudo/root +* Check `/sys/kernel/tracing/` is accessible +* Verify tracing is enabled in kernel config + +## Examples + +Check out these examples in the `BCC-Examples/` directory that demonstrate helper functions: + +* [hello_world.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/hello_world.py) - Basic tracing with `print()` +* [sync_timing.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_timing.py) - Using `ktime()` for timing measurements +* [hello_perf_output.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/hello_perf_output.py) - Using `pid()`, `ktime()`, and `comm()` with perf events +* [vfsreadlat.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/vfsreadlat.py) - Latency measurement with `ktime()` in kprobes + +## Next Steps + +* Explore {doc}`maps` for data storage with helpers +* Learn about {doc}`compilation` to understand helper implementation +* See {doc}`decorators` for marking BPF functions diff --git a/docs/user-guide/index.md b/docs/user-guide/index.md new file mode 100644 index 0000000..dce1b5e --- /dev/null +++ b/docs/user-guide/index.md @@ -0,0 +1,87 @@ +# User Guide + +This user guide provides comprehensive documentation for all PythonBPF features. Whether you're building simple tracing tools or complex performance monitoring systems, this guide will help you master PythonBPF. + +## Overview + +PythonBPF transforms Python code into eBPF bytecode that runs in the Linux kernel. It provides a Pythonic interface to eBPF features through decorators, type annotations, and familiar programming patterns. + +## Core Concepts + +### Decorators + +PythonBPF uses decorators to mark code for BPF compilation: + +* `@bpf` - Mark functions and classes for BPF compilation +* `@map` - Define BPF maps for data storage +* `@struct` - Define custom data structures +* `@section(name)` - Specify attachment points +* `@bpfglobal` - Define global variables + +### Compilation Pipeline + +Your Python code goes through several stages: + +1. **IR Generation** - The Python AST is transformed into LLVM IR using llvmlite +2. **BPF Compilation** - LLVM IR is compiled to BPF bytecode using `llc` +3. **Loading** - The BPF object is loaded into the kernel using libbpf +4. **Attachment** - Programs are attached to kernel hooks (tracepoints, kprobes, etc.) + +## Code Organization + +When writing BPF programs with PythonBPF, we recommend: + +1. **Use type hints** - Required for proper code generation +2. **Test incrementally** - Verify each component works before adding complexity + +## Type System + +PythonBPF uses Python's `ctypes` module for type definitions: + +* `c_int8`, `c_int16`, `c_int32`, `c_int64` - Signed integers +* `c_uint8`, `c_uint16`, `c_uint32`, `c_uint64` - Unsigned integers +* `c_char`, `c_bool` - Characters and booleans +* `c_void_p` - Void pointers +* `str(N)` - Fixed-length strings (e.g., `str(16)` for 16-byte string) + +## Example Structure + +A typical PythonBPF program follows this structure: + +```python +from pythonbpf import bpf, map, section, bpfglobal, BPF, compile +from pythonbpf.maps import HashMap +from ctypes import c_void_p, c_int64, c_uint32 + +# Define maps +@bpf +@map +def my_map() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=1024) + +# Define BPF function +@bpf +@section("tracepoint/...") +def my_function(ctx: c_void_p) -> c_int64: + # BPF logic here + return 0 + +# License (required) +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +# Compile, load, and run +if __name__ == "__main__": + b = BPF() + b.load_and_attach() + # Use the program... + +# Or, compile to an object file +compile() +``` + +## Next Steps + +Start with {doc}`decorators` to learn about all available decorators, then explore the other sections to master specific features. diff --git a/docs/user-guide/maps.md b/docs/user-guide/maps.md new file mode 100644 index 0000000..939783b --- /dev/null +++ b/docs/user-guide/maps.md @@ -0,0 +1,476 @@ +# BPF Maps + +Maps are BPF data structures that provide storage and communication mechanisms. They allow BPF programs to: + +* Store state between invocations +* Share data between multiple BPF programs +* Communicate with userspace applications + +```{note} +**Work in Progress:** PythonBPF is under active development. We are constantly adding support for more map types, helpers, and kfuncs. Check back for updates! +``` +For comprehensive documentation on BPF maps, see the [eBPF Maps documentation on ebpf.io](https://ebpf.io/what-is-ebpf/#maps). + +## Map Types + +PythonBPF supports several map types, each optimized for different use cases. + +### HashMap + +Hash maps provide efficient key-value storage with O(1) lookup time. + +> **Linux Kernel Map Type:** `BPF_MAP_TYPE_HASH` + +#### Definition + +```python +from pythonbpf import bpf, map +from pythonbpf.maps import HashMap +from ctypes import c_uint32, c_uint64 + +@bpf +@map +def my_map() -> HashMap: + return HashMap( + key=c_uint32, + value=c_uint64, + max_entries=1024 + ) +``` + +#### Parameters + +* `key` - The type of the key (must be a ctypes type or struct) +* `value` - The type of the value (must be a ctypes type or struct) +* `max_entries` - Maximum number of entries the map can hold + +#### Operations + +##### lookup(key) + +Look up a value by key. Returns the value if found, `None` otherwise. + +```python +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def trace_open(ctx: c_void_p) -> c_int64: + value = my_map.lookup(1) + if value: + print(f"Found value: {value}") + return 0 +``` + +##### update(key, value, flags=None) + +Update or insert a key-value pair. + +```python +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def track_opens(ctx: c_void_p) -> c_int64: + key = pid() + count = my_map.lookup(key) + if count: + my_map.update(key, count + 1) + else: + my_map.update(key, 1) + return 0 +``` + +##### delete(key) + +Remove an entry from the map. + +```python +@bpf +def cleanup(ctx: c_void_p) -> c_int64: + my_map.delete(1) + return 0 +``` + +#### Use Cases + +* Counting events per process/CPU +* Storing timestamps for latency calculations +* Caching lookup results +* Implementing rate limiters + +#### Example: Process Counter + +```python +from pythonbpf import bpf, map, section, bpfglobal, BPF +from pythonbpf.maps import HashMap +from pythonbpf.helper import pid +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@map +def process_count() -> HashMap: + return HashMap(key=c_uint32, value=c_uint64, max_entries=4096) + +@bpf +@section("tracepoint/syscalls/sys_enter_clone") +def count_processes(ctx: c_void_p) -> c_int64: + process_id = pid() + count = process_count.lookup(process_id) + + if count: + new_count = count + 1 + process_count.update(process_id, new_count) + else: + process_count.update(process_id, 1) + + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" + +if __name__ == "__main__": + b = BPF() + b.load_and_attach() + # Access map from userspace + from pylibbpf import BpfMap + map_obj = BpfMap(b, process_count) + # Read values... +``` + +### PerfEventArray + +Perf event arrays are used to send data from BPF programs to userspace with high throughput. + +> **Linux Kernel Map Type:** `BPF_MAP_TYPE_PERF_EVENT_ARRAY` + +#### Definition + +```python +from pythonbpf.maps import PerfEventArray + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray( + key_size=c_uint32, + value_size=c_uint32 + ) +``` + +#### Parameters + +* `key_size` - Type for the key (typically `c_uint32`) +* `value_size` - Type for the value (typically `c_uint32`) + +#### Operations + +##### output(data) + +Send data to userspace. The data can be a struct or basic type. + +```python +@bpf +@struct +class Event: + pid: c_uint32 + timestamp: c_uint64 + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray(key_size=c_uint32, value_size=c_uint32) + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def send_event(ctx: c_void_p) -> c_int64: + event = Event() + event.pid = pid() + event.timestamp = ktime() + events.output(event) + return 0 +``` + +#### Use Cases + +* Sending detailed event data to userspace +* Real-time monitoring and alerting +* Collecting samples for analysis +* High-throughput data collection + +#### Example: Event Logging + +```python +from pythonbpf import bpf, map, struct, section, bpfglobal, BPF +from pythonbpf.maps import PerfEventArray +from pythonbpf.helper import pid, ktime, comm +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@struct +class ProcessEvent: + timestamp: c_uint64 + pid: c_uint32 + comm: str(16) + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray(key_size=c_uint32, value_size=c_uint32) + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def log_exec(ctx: c_void_p) -> c_int64: + event = ProcessEvent() + event.timestamp = ktime() + event.pid = pid() + comm(event.comm) # Fills event.comm with process name + events.output(event) + return 0 + +@bpf +@bpfglobal +def LICENSE() -> str: + return "GPL" +``` + +### RingBuffer + +Ring buffers provide efficient, ordered event delivery with lower overhead than perf event arrays. + +> **Linux Kernel Map Type:** `BPF_MAP_TYPE_RINGBUF` + +#### Definition + +```python +from pythonbpf.maps import RingBuffer + +@bpf +@map +def events() -> RingBuffer: + return RingBuffer(max_entries=4096) +``` + +#### Parameters + +* `max_entries` - Maximum size of the ring buffer in bytes (must be power of 2) + +#### Operations + +##### output(data, flags=0) + +Send data to the ring buffer. + +```python +@bpf +@section("tracepoint/syscalls/sys_enter_open") +def log_event(ctx: c_void_p) -> c_int64: + event = Event() + event.pid = pid() + events.output(event) + return 0 +``` + +##### reserve(size) + +Reserve space in the ring buffer. Returns a pointer to the reserved space or 0 if no space available. + +```python +@bpf +def reserve_space(ctx: c_void_p) -> c_int64: + ptr = events.reserve(64) # Reserve 64 bytes + if ptr: + # Use the reserved space + events.submit(ptr) + return 0 +``` + +##### submit(data, flags=0) + +Submit previously reserved space. + +##### discard(data, flags=0) + +Discard previously reserved space without submitting. + +#### Use Cases + +* Modern event streaming (preferred over PerfEventArray) +* Lower overhead event delivery +* Ordered event processing +* Kernel 5.8+ systems + +#### Advantages over PerfEventArray + +* Lower memory overhead +* Better performance +* Simpler API +* Ordered delivery guarantees + +### BPFMapType Enum + +PythonBPF supports various BPF map types through the `BPFMapType` enum: + +```python +from pythonbpf.maps import BPFMapType + +# Common map types +BPFMapType.BPF_MAP_TYPE_HASH # Hash map +BPFMapType.BPF_MAP_TYPE_ARRAY # Array map +BPFMapType.BPF_MAP_TYPE_PERF_EVENT_ARRAY # Perf event array +BPFMapType.BPF_MAP_TYPE_RINGBUF # Ring buffer +BPFMapType.BPF_MAP_TYPE_STACK_TRACE # Stack trace storage +BPFMapType.BPF_MAP_TYPE_LRU_HASH # LRU hash map +``` + +## Using Maps with Structs + +Maps can store complex data types using structs as values: + +```python +from pythonbpf import bpf, map, struct, section +from pythonbpf.maps import HashMap +from ctypes import c_uint32, c_uint64 + +@bpf +@struct +class Stats: + count: c_uint64 + total_time: c_uint64 + max_time: c_uint64 + +@bpf +@map +def process_stats() -> HashMap: + return HashMap( + key=c_uint32, # PID as key + value=Stats, # Struct as value + max_entries=1024 + ) + +@bpf +@section("tracepoint/syscalls/sys_enter_read") +def track_stats(ctx: c_void_p) -> c_int64: + process_id = pid() + stats = process_stats.lookup(process_id) + + if stats: + stats.count = stats.count + 1 + process_stats.update(process_id, stats) + else: + new_stats = Stats() + new_stats.count = 1 + new_stats.total_time = 0 + new_stats.max_time = 0 + process_stats.update(process_id, new_stats) + + return 0 +``` + +## Accessing Maps from Userspace + +After loading a BPF program, you can access maps from Python using `pylibbpf`: + +```python +from pythonbpf import BPF +from pylibbpf import BpfMap + +# Load BPF program +b = BPF() +b.load_and_attach() + +# Get map reference +map_obj = BpfMap(b, my_map) + +# Read all key-value pairs +for key, value in map_obj.items(): + print(f"Key: {key}, Value: {value}") + +# Get all keys +keys = list(map_obj.keys()) + +# Get all values +values = list(map_obj.values()) + +# Lookup specific key +value = map_obj[key] + +# Update from userspace +map_obj[key] = new_value + +# Delete from userspace +del map_obj[key] +``` + +## Common Patterns + +### Counter Pattern + +```python +count = my_map.lookup(key) +if count: + my_map.update(key, count + 1) +else: + my_map.update(key, 1) +``` + +### Latency Tracking + +```python +# Store start time +start = ktime() +start_map.update(key, start) + +# Later: calculate latency +start_time = start_map.lookup(key) +if start_time: + latency = ktime() - start_time + latency_map.update(key, latency) + start_map.delete(key) +``` + +### Event Sampling + +```python +# Only process every Nth event +count = counter.lookup(key) +if count and (count % 100) == 0: + events.output(data) +counter.update(key, count + 1 if count else 1) +``` + +## Troubleshooting + +### Map Not Found + +If you get "map not found" errors: +* Ensure the map is defined with `@bpf` and `@map` +* Check that the map name matches exactly +* Verify the BPF program loaded successfully + +### Map Full + +If updates fail due to map being full: +* Increase `max_entries` +* Use LRU maps for automatic eviction +* Add cleanup logic to delete old entries + +### Type Errors + +If you get type-related errors: +* Verify key and value types match the definition +* Check that structs are properly defined + +## Examples + +Check out these examples in the `BCC-Examples/` directory that demonstrate map usage: + +* [sync_timing.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_timing.py) - HashMap for storing timestamps +* [sync_count.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_count.py) - HashMap for counting events +* [hello_perf_output.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/hello_perf_output.py) - PerfEventArray for sending structs to userspace +* [sync_perf_output.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/sync_perf_output.py) - PerfEventArray with timing data +* [disksnoop.py](https://github.com/pythonbpf/Python-BPF/blob/main/BCC-Examples/disksnoop.py) - HashMap for tracking disk I/O + +## Next Steps + +* Learn about {doc}`structs` for defining custom value types +* Explore {doc}`helpers` for BPF helper functions +* See {doc}`compilation` to understand how maps are compiled diff --git a/docs/user-guide/structs.md b/docs/user-guide/structs.md new file mode 100644 index 0000000..5c68c23 --- /dev/null +++ b/docs/user-guide/structs.md @@ -0,0 +1,413 @@ +# BPF Structs + +Structs allow you to define custom data types for use in BPF programs. They provide a way to group related fields together and can be used as map values, event payloads, or local variables. + +## Defining Structs + +Use the `@bpf` and `@struct` decorators to define a BPF struct: + +```python +from pythonbpf import bpf, struct +from ctypes import c_uint64, c_uint32 + +@bpf +@struct +class Event: + timestamp: c_uint64 + pid: c_uint32 + cpu: c_uint32 +``` + +## Field Types + +Structs support various field types from Python's `ctypes` module. + +### Integer Types + +```python +from ctypes import ( + c_int8, c_int16, c_int32, c_int64, + c_uint8, c_uint16, c_uint32, c_uint64 +) + +@bpf +@struct +class Numbers: + small_int: c_int8 # -128 to 127 + short_int: c_int16 # -32768 to 32767 + int_val: c_int32 # -2^31 to 2^31-1 + long_int: c_int64 # -2^63 to 2^63-1 + + byte: c_uint8 # 0 to 255 + word: c_uint16 # 0 to 65535 + dword: c_uint32 # 0 to 2^32-1 + qword: c_uint64 # 0 to 2^64-1 +``` + +### String Types + +Fixed-length strings are defined using `str(N)` where N is the size: + +```python +@bpf +@struct +class ProcessInfo: + name: str(16) # 16-byte string + path: str(256) # 256-byte string +``` + +```{note} +Strings in BPF are fixed-length and null-terminated. The size includes the null terminator. +``` + +### Pointer Types + +```python +from ctypes import c_void_p, c_char_p + +@bpf +@struct +class Pointers: + ptr: c_void_p # Generic pointer + str_ptr: c_char_p # Character pointer +``` + +### Nested Structs + +Structs can contain other structs as fields: + +```python +@bpf +@struct +class Address: + street: str(64) + city: str(32) + zip_code: c_uint32 + +@bpf +@struct +class Person: + name: str(32) + age: c_uint32 + address: Address # Nested struct +``` + +## Using Structs + +### As Local Variables + +Create and use struct instances within BPF functions: + +```python +from pythonbpf import bpf, struct, section +from pythonbpf.helper import pid, ktime, comm +from ctypes import c_void_p, c_int64, c_uint64, c_uint32 + +@bpf +@struct +class Event: + timestamp: c_uint64 + pid: c_uint32 + comm: str(16) + +@bpf +@section("tracepoint/syscalls/sys_enter_execve") +def capture_event(ctx: c_void_p) -> c_int64: + # Create an instance + event = Event() + + # Set fields + event.timestamp = ktime() + event.pid = pid() + comm(event.comm) # Fills event.comm with process name + + # Use the struct + print(f"Process with PID {event.pid}") + + return 0 +``` + +### As Map Keys and Values + +Use structs as keys and values in maps for complex state storage: + +```python +from pythonbpf import bpf, struct, map, section +from pythonbpf.maps import HashMap +from ctypes import c_uint32, c_uint64 + +@bpf +@struct +class ProcessStats: + syscall_count: c_uint64 + total_time: c_uint64 + max_latency: c_uint64 + +@bpf +@map +def stats() -> HashMap: + return HashMap( + key=c_uint32, + value=ProcessStats, + max_entries=1024 + ) + +@bpf +@section("tracepoint/syscalls/sys_enter_read") +def track_syscalls(ctx: c_void_p) -> c_int64: + process_id = pid() + + # Lookup existing stats + s = stats.lookup(process_id) + + if s: + # Update existing stats + s.syscall_count = s.syscall_count + 1 + stats.update(process_id, s) + else: + # Create new stats + new_stats = ProcessStats() + new_stats.syscall_count = 1 + new_stats.total_time = 0 + new_stats.max_latency = 0 + stats.update(process_id, new_stats) + + return 0 +``` + +### With Perf Events + +Send struct data to userspace using PerfEventArray: + +```python +from pythonbpf import bpf, struct, map, section +from pythonbpf.maps import PerfEventArray +from pythonbpf.helper import pid, ktime, comm +from ctypes import c_void_p, c_int64, c_uint32, c_uint64 + +@bpf +@struct +class ProcessEvent: + timestamp: c_uint64 + pid: c_uint32 + ppid: c_uint32 + comm: str(16) + +@bpf +@map +def events() -> PerfEventArray: + return PerfEventArray(key_size=c_uint32, value_size=c_uint32) + +@bpf +@section("tracepoint/sched/sched_process_fork") +def trace_fork(ctx: c_void_p) -> c_int64: + event = ProcessEvent() + event.timestamp = ktime() + event.pid = pid() + comm(event.comm) # Fills event.comm with process name + + # Send to userspace + events.output(event) + + return 0 +``` + +### With Ring Buffers + +```python +from pythonbpf import bpf, struct, map, section +from pythonbpf.maps import RingBuffer + +@bpf +@struct +class FileEvent: + timestamp: c_uint64 + pid: c_uint32 + filename: str(256) + +@bpf +@map +def events() -> RingBuffer: + return RingBuffer(max_entries=4096) + +@bpf +@section("tracepoint/syscalls/sys_enter_openat") +def trace_open(ctx: c_void_p) -> c_int64: + event = FileEvent() + event.timestamp = ktime() + event.pid = pid() + + events.output(event) + + return 0 +``` + +## Field Access and Modification + +### Reading Fields + +Access struct fields using dot notation: + +```python +event = Event() +ts = event.timestamp +process_id = event.pid +``` + +### Writing Fields + +Assign values to fields: + +```python +event = Event() +event.timestamp = ktime() +event.pid = pid() +comm(event.comm) +``` + +## StructType Class + +PythonBPF provides a `StructType` class for working with struct metadata: + +```python +from pythonbpf.structs import StructType + +# Define a struct +@bpf +@struct +class MyStruct: + field1: c_uint64 + field2: c_uint32 + +# Access struct information (from userspace) +# This is typically used internally by the compiler +``` + +## Complex Examples + +### Network Packet Event + +```python +from pythonbpf import bpf, struct, map, section +from pythonbpf.maps import RingBuffer +from pythonbpf.helper import ktime, XDP_PASS +from ctypes import c_void_p, c_int64, c_uint8, c_uint16, c_uint32, c_uint64 + +@bpf +@struct +class PacketEvent: + timestamp: c_uint64 + src_ip: c_uint32 + dst_ip: c_uint32 + src_port: c_uint16 + dst_port: c_uint16 + protocol: c_uint8 + length: c_uint16 + +@bpf +@map +def packets() -> RingBuffer: + return RingBuffer(max_entries=8192) + +@bpf +@section("xdp") +def capture_packets(ctx: c_void_p) -> c_int64: + pkt = PacketEvent() + pkt.timestamp = ktime() + # Parse packet data from ctx... + + packets.output(pkt) + + return XDP_PASS +``` + +### Process Lifecycle Tracking + +```python +@bpf +@struct +class ProcessLifecycle: + pid: c_uint32 + ppid: c_uint32 + start_time: c_uint64 + exit_time: c_uint64 + exit_code: c_int32 + comm: str(16) + +@bpf +@map +def process_info() -> HashMap: + return HashMap( + key=c_uint32, + value=ProcessLifecycle, + max_entries=4096 + ) + +@bpf +@section("tracepoint/sched/sched_process_fork") +def track_fork(ctx: c_void_p) -> c_int64: + process_id = pid() + + info = ProcessLifecycle() + info.pid = process_id + info.start_time = ktime() + + process_info.update(process_id, info) + + return 0 + +@bpf +@section("tracepoint/sched/sched_process_exit") +def track_exit(ctx: c_void_p) -> c_int64: + process_id = pid() + + info = process_info.lookup(process_id) + if info: + info.exit_time = ktime() + process_info.update(process_id, info) + + return 0 +``` + +## Troubleshooting + +### Struct Size Issues + +If you encounter size-related errors: +* Check for excessive padding +* Verify field types are correct +* Consider reordering fields + +### Initialization Problems + +If fields aren't initialized correctly: +* Always initialize all fields explicitly +* Set default values where appropriate +* Use helper functions for dynamic values + +### Type Mismatch Errors + +If you get type errors: +* Ensure field types match assignments +* Check that imported types are from `ctypes` +* Verify nested struct definitions + +## Reading Struct Data in Userspace + +After capturing struct data, read it in Python: + +```python +from pylibbpf import BpfMap + +# Read from map +map_obj = BpfMap(b, stats) +for key, value_bytes in map_obj.items(): + value = Event.from_buffer_copy(value_bytes) + print(f"PID: {value.pid}, Comm: {value.comm.decode()}") +``` + +## Next Steps + +* Learn about {doc}`maps` for storing struct data +* Explore {doc}`helpers` for populating struct fields +* See {doc}`compilation` to understand how structs are compiled diff --git a/pyproject.toml b/pyproject.toml index 851906b..c548ca7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,14 @@ dependencies = [ "pylibbpf" ] +[project.optional-dependencies] +docs = [ + "sphinx>=7.0", + "myst-parser>=2.0", + "sphinx-rtd-theme>=2.0", + "sphinx-copybutton", +] + [tool.setuptools.packages.find] where = ["."] include = ["pythonbpf*"]