Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions python/cuda/bench/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,7 @@
)

del load_nvidia_dynamic_lib

__doc__ = """
CUDA Kernel Benchmarking Library Python API
"""
319 changes: 63 additions & 256 deletions python/cuda/bench/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -29,284 +29,91 @@ from collections.abc import Callable, Sequence
from typing import Optional, Self, SupportsFloat, SupportsInt, Union

class CudaStream:
"""Represents CUDA stream

Note
----
The class is not user-constructible.
"""
def __cuda_stream__(self) -> tuple[int, int]:
"""
Special method implement CUDA stream protocol
from `cuda.core`. Returns a pair of integers:
(protocol_version, integral_value_of_cudaStream_t pointer)

Example
-------
import cuda.core.experimental as core
import cuda.bench as bench

def bench(state: bench.State):
dev = core.Device(state.get_device())
dev.set_current()
# converts CudaString to core.Stream
# using __cuda_stream__ protocol
dev.create_stream(state.get_stream())
"""
...

def addressof(self) -> int:
"Integral value of address of driver's CUDA stream struct"
...
def __cuda_stream__(self) -> tuple[int, int]: ...
def addressof(self) -> int: ...

class Benchmark:
"""Represents NVBench benchmark.

Note
----
The class is not user-constructible.

Use `~register` function to create Benchmark and register
it with NVBench.
"""
def get_name(self) -> str:
"Get benchmark name"
...
def add_int64_axis(self, name: str, values: Sequence[SupportsInt]) -> Self:
"Add integral type parameter axis with given name and values to sweep over"
...
def get_name(self) -> str: ...
def add_int64_axis(self, name: str, values: Sequence[SupportsInt]) -> Self: ...
def add_int64_power_of_two_axis(
self, name: str, values: Sequence[SupportsInt]
) -> Self:
"Add integral type parameter axis with given name and values to sweep over"
...
def add_float64_axis(self, name: str, values: Sequence[SupportsFloat]) -> Self:
"Add floating-point type parameter axis with given name and values to sweep over"
...
def add_string_axis(self, name: str, values: Sequence[str]) -> Self:
"Add string type parameter axis with given name and values to sweep over"
...
def set_name(self, name: str) -> Self:
"Set benchmark name"
...
def set_is_cpu_only(self, is_cpu_only: bool) -> Self:
"Set whether this benchmark only executes on CPU"
...
def set_run_once(self, v: bool) -> Self:
"Set whether all benchmark configurations are executed only once"
...
def set_skip_time(self, duration_seconds: SupportsFloat) -> Self:
"Set run durations, in seconds, that should be skipped"
...
def set_throttle_recovery_delay(self, delay_seconds: SupportsFloat) -> Self:
"Set throttle recovery delay, in seconds"
...
def set_throttle_threshold(self, threshold: SupportsFloat) -> Self:
"Set throttle threshold, as a fraction of maximal GPU frequency"
...
def set_timeout(self, duration_seconds: SupportsFloat) -> Self:
"Set benchmark run duration timeout value, in seconds"
...
def set_stopping_criterion(self, criterion: str) -> Self:
"Set stopping criterion to be used"
...
def set_criterion_param_float64(self, name: str, value: SupportsFloat) -> Self:
"Set stopping criterion floating point parameter value"
...
def set_criterion_param_int64(self, name: str, value: SupportsInt) -> Self:
"Set stopping criterion integer parameter value"
...
def set_criterion_param_string(self, name: str, value: str) -> Self:
"Set stopping criterion string parameter value"
...
def set_min_samples(self, count: SupportsInt) -> Self:
"Set minimal samples count before stopping criterion applies"
...
) -> Self: ...
def add_float64_axis(self, name: str, values: Sequence[SupportsFloat]) -> Self: ...
def add_string_axis(self, name: str, values: Sequence[str]) -> Self: ...
def set_name(self, name: str) -> Self: ...
def set_run_once(self, v: bool) -> Self: ...
def set_skip_time(self, duration_seconds: SupportsFloat) -> Self: ...
def set_throttle_recovery_delay(self, delay_seconds: SupportsFloat) -> Self: ...
def set_throttle_threshold(self, threshold: SupportsFloat) -> Self: ...
def set_timeout(self, duration_seconds: SupportsFloat) -> Self: ...
def set_stopping_criterion(self, criterion: str) -> Self: ...
def set_criterion_param_float64(self, name: str, value: SupportsFloat) -> Self: ...
def set_criterion_param_int64(self, name: str, value: SupportsInt) -> Self: ...
def set_criterion_param_string(self, name: str, value: str) -> Self: ...
def set_min_samples(self, count: SupportsInt) -> Self: ...

class Launch:
"""Configuration object for function launch.

Note
----
The class is not user-constructible.
"""
def get_stream(self) -> CudaStream:
"Get CUDA stream of this configuration"
...
def get_stream(self) -> CudaStream: ...

class State:
"""Represent benchmark configuration state.

Note
----
The class is not user-constructible.
"""
def has_device(self) -> bool:
"True if configuration has a device"
...
def has_printers(self) -> bool:
"True if configuration has a printer"
...
def get_device(self) -> Union[int, None]:
"Get device_id of the device from this configuration"
...
def get_stream(self) -> CudaStream:
"CudaStream object from this configuration"
...
def get_int64(self, name: str) -> int:
"Get value for given Int64 axis from this configuration"
...
def get_int64_or_default(self, name: str, default_value: SupportsInt) -> int:
"Get value for given Int64 axis from this configuration"
...
def get_float64(self, name: str) -> float:
"Get value for given Float64 axis from this configuration"
...
def get_float64_or_default(self, name: str, default_value: SupportsFloat) -> float:
"Get value for given Float64 axis from this configuration"
...
def get_string(self, name: str) -> str:
"Get value for given String axis from this configuration"
...
def get_string_or_default(self, name: str, default_value: str) -> str:
"Get value for given String axis from this configuration"
...
def has_device(self) -> bool: ...
def has_printers(self) -> bool: ...
def get_device(self) -> Union[int, None]: ...
def get_stream(self) -> CudaStream: ...
def get_int64(self, name: str) -> int: ...
def get_int64_or_default(self, name: str, default_value: SupportsInt) -> int: ...
def get_float64(self, name: str) -> float: ...
def get_float64_or_default(
self, name: str, default_value: SupportsFloat
) -> float: ...
def get_string(self, name: str) -> str: ...
def get_string_or_default(self, name: str, default_value: str) -> str: ...
def add_element_count(
self, count: SupportsInt, column_name: Optional[str] = None
) -> None:
"Add element count"
...
def set_element_count(self, count: SupportsInt) -> None:
"Set element count"
...
def get_element_count(self) -> int:
"Get element count"
...
def skip(self, reason: str) -> None:
"Skip this configuration"
...
def is_skipped(self) -> bool:
"Has this configuration been skipped"
...
def get_skip_reason(self) -> str:
"Get reason provided for skipping this configuration"
...
) -> None: ...
def set_element_count(self, count: SupportsInt) -> None: ...
def get_element_count(self) -> int: ...
def skip(self, reason: str) -> None: ...
def is_skipped(self) -> bool: ...
def get_skip_reason(self) -> str: ...
def add_global_memory_reads(
self, nbytes: SupportsInt, /, column_name: str = ""
) -> None:
"Inform NVBench that given amount of bytes is being read by the benchmark from global memory"
...
) -> None: ...
def add_global_memory_writes(
self, nbytes: SupportsInt, /, column_name: str = ""
) -> None:
"Inform NVBench that given amount of bytes is being written by the benchmark into global memory"
...
def get_benchmark(self) -> Benchmark:
"Get Benchmark this configuration is a part of"
...
def get_throttle_threshold(self) -> float:
"Get throttle threshold value, as fraction of maximal frequency"
...
def set_throttle_threshold(self, threshold_fraction: SupportsFloat) -> None:
"Set throttle threshold fraction to specified value, expected to be between 0 and 1"
...
def get_min_samples(self) -> int:
"Get the number of benchmark timings NVBench performs before stopping criterion begins being used"
...
def set_min_samples(self, min_samples_count: SupportsInt) -> None:
"Set the number of benchmark timings for NVBench to perform before stopping criterion begins being used"
...
def get_disable_blocking_kernel(self) -> bool:
"True if use of blocking kernel by NVBench is disabled, False otherwise"
...
def set_disable_blocking_kernel(self, flag: bool) -> None:
"Use flag = True to disable use of blocking kernel by NVBench"
...
def get_run_once(self) -> bool:
"Boolean flag whether configuration should only run once"
...
def set_run_once(self, run_once_flag: bool) -> None:
"Set run-once flag for this configuration"
...
def get_timeout(self) -> float:
"Get time-out value for benchmark execution of this configuration, in seconds"
...
def set_timeout(self, duration: SupportsFloat) -> None:
"Set time-out value for benchmark execution of this configuration, in seconds"
...
def get_blocking_kernel_timeout(self) -> float:
"Get time-out value for execution of blocking kernel, in seconds"
...
def set_blocking_kernel_timeout(self, duration: SupportsFloat) -> None:
"Set time-out value for execution of blocking kernel, in seconds"
...
def collect_cupti_metrics(self) -> None:
"Request NVBench to record CUPTI metrics while running benchmark for this configuration"
...
def is_cupti_required(self) -> bool:
"True if (some) CUPTI metrics are being collected"
...
) -> None: ...
def get_benchmark(self) -> Benchmark: ...
def get_throttle_threshold(self) -> float: ...
def set_throttle_threshold(self, threshold_fraction: SupportsFloat) -> None: ...
def get_min_samples(self) -> int: ...
def set_min_samples(self, min_samples_count: SupportsInt) -> None: ...
def get_disable_blocking_kernel(self) -> bool: ...
def set_disable_blocking_kernel(self, flag: bool) -> None: ...
def get_run_once(self) -> bool: ...
def set_run_once(self, run_once_flag: bool) -> None: ...
def get_timeout(self) -> float: ...
def set_timeout(self, duration: SupportsFloat) -> None: ...
def get_blocking_kernel_timeout(self) -> float: ...
def set_blocking_kernel_timeout(self, duration: SupportsFloat) -> None: ...
def collect_cupti_metrics(self) -> None: ...
def is_cupti_required(self) -> bool: ...
def exec(
self,
fn: Callable[[Launch], None],
/,
*,
batched: Optional[bool] = True,
sync: Optional[bool] = False,
):
"""Execute callable running the benchmark.

The callable may be executed multiple times.

Parameters
----------
fn: Callable
Python callable with signature fn(Launch) -> None that executes the benchmark.
batched: bool, optional
If `True`, no cache flushing is performed between callable invocations.
Default: `True`.
sync: bool, optional
True value indicates that callable performs device synchronization.
NVBench disables use of blocking kernel in this case.
Default: `False`.
"""
...
def get_short_description(self) -> str:
"Get short description for this configuration"
...
): ...
def get_short_description(self) -> str: ...
def add_summary(
self, column_name: str, value: Union[SupportsInt, SupportsFloat, str]
) -> None:
"Add summary column with a value"
...
def get_axis_values(self) -> dict[str, int | float | str]:
"Get dictionary with axis values for this configuration"
...
def get_axis_values_as_string(self) -> str:
"Get string of space-separated name=value pairs for this configuration"
...
def get_stopping_criterion(self) -> str:
"Get string name of stopping criterion used"
...

def register(fn: Callable[[State], None]) -> Benchmark:
"""
Register given benchmarking function with NVBench.
"""
...

def run_all_benchmarks(argv: Sequence[str]) -> None:
"""
Run all benchmarks registered with NVBench.

Parameters
----------
argv: List[str]
Sequence of CLI arguments controlling NVBench. Usually, it is `sys.argv`.
"""
...
) -> None: ...
def get_axis_values(self) -> dict[str, int | float | str]: ...
def get_axis_values_as_string(self) -> str: ...
def get_stopping_criterion(self) -> str: ...

class NVBenchRuntimeError(RuntimeError):
"""An exception raised if running benchmarks encounters an error"""
def register(fn: Callable[[State], None]) -> Benchmark: ...
def run_all_benchmarks(argv: Sequence[str]) -> None: ...

...
class NVBenchRuntimeError(RuntimeError): ...
Loading