From e4abc208d110235787b324e7bd3986f861090bba Mon Sep 17 00:00:00 2001 From: Matthew Mckee Date: Wed, 3 Dec 2025 14:05:37 +0000 Subject: [PATCH 01/13] Parallel collection --- Cargo.lock | 19 + Cargo.toml | 7 + crates/karva_benchmark/benches/karva.rs | 2 +- crates/karva_benchmark/src/walltime.rs | 5 +- crates/karva_cli/src/args.rs | 2 +- crates/karva_cli/src/lib.rs | 5 +- crates/karva_cli/src/logging.rs | 2 +- crates/karva_core/src/diagnostic/mod.rs | 2 +- crates/karva_core/src/discovery/discoverer.rs | 4 +- .../src/discovery/models/function.rs | 2 +- crates/karva_core/src/name.rs | 2 +- crates/karva_core/src/utils.rs | 2 +- crates/karva_core/tests/integration/basic.rs | 2 +- .../extensions/fixtures/dynamic.rs | 2 +- crates/karva_diff/src/main.rs | 2 +- crates/karva_project/Cargo.toml | 9 + crates/karva_project/src/db.rs | 6 + crates/karva_project/src/envs.rs | 19 + crates/karva_project/src/lib.rs | 20 +- crates/karva_project/src/path/mod.rs | 2 +- crates/karva_project/src/system.rs | 480 +++++++++++++++ crates/karva_project/src/system/os.rs | 575 ++++++++++++++++++ .../src/system/walk_directory.rs | 324 ++++++++++ 23 files changed, 1469 insertions(+), 26 deletions(-) create mode 100644 crates/karva_project/src/db.rs create mode 100644 crates/karva_project/src/envs.rs create mode 100644 crates/karva_project/src/system.rs create mode 100644 crates/karva_project/src/system/os.rs create mode 100644 crates/karva_project/src/system/walk_directory.rs diff --git a/Cargo.lock b/Cargo.lock index 52b8d64b..58e5018d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -706,6 +706,16 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "etcetera" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de48cc4d1c1d97a20fd819def54b890cadde72ed3ad0c614822a0a433361be96" +dependencies = [ + "cfg-if", + "windows-sys 0.61.2", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -1202,9 +1212,18 @@ name = "karva_project" version = "0.0.0" dependencies = [ "camino", + "dunce", + "etcetera", + "filetime", + "glob", + "ignore", "karva_test", "ruff_db", "ruff_python_ast", + "rustc-hash", + "tempfile", + "thiserror", + "tracing", "tracing-subscriber", ] diff --git a/Cargo.toml b/Cargo.toml index b68cdfd8..cdb73643 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,11 @@ ctor = "0.6.1" ctrlc = { version = "3.5.1" } divan = { package = "codspeed-divan-compat", version = "4.1.0" } dunce = { version = "1.0.5" } +etcetera = { version = "0.11.0" } +# globset = { version = "0.4.14" } +# globwalk = { version = "0.9.1" } +filetime = { version = "0.2.23" } +glob = { version = "0.3.1" } ignore = { version = "0.4.25" } indexmap = { version = "2.12.1" } insta = { version = "1.44.3" } @@ -46,9 +51,11 @@ ruff_python_parser = { git = "https://github.com/astral-sh/ruff/", branch = "mai ruff_python_trivia = { git = "https://github.com/astral-sh/ruff/", branch = "main" } ruff_source_file = { git = "https://github.com/astral-sh/ruff/", branch = "main" } ruff_text_size = { git = "https://github.com/astral-sh/ruff/", branch = "main" } +rustc-hash = { version = "2.0.0" } serde = { version = "1.0.228" } serde_json = { version = "1.0.145" } tempfile = "3.23" +thiserror = { version = "2.0.0" } tracing = { version = "0.1.41", features = ["release_max_level_debug"] } tracing-flame = { version = "0.2.0" } tracing-subscriber = { version = "0.3.20", default-features = false, features = [ diff --git a/crates/karva_benchmark/benches/karva.rs b/crates/karva_benchmark/benches/karva.rs index 8eab5b29..ad808e8e 100644 --- a/crates/karva_benchmark/benches/karva.rs +++ b/crates/karva_benchmark/benches/karva.rs @@ -8,7 +8,7 @@ use karva_benchmark::{ criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}, }; use karva_core::{DummyReporter, TestRunner, testing::setup_module}; -use karva_project::{path::absolute, project::Project}; +use karva_project::{Project, absolute}; fn create_test_cases() -> Vec { vec![ diff --git a/crates/karva_benchmark/src/walltime.rs b/crates/karva_benchmark/src/walltime.rs index 4c38d9d2..03cbba09 100644 --- a/crates/karva_benchmark/src/walltime.rs +++ b/crates/karva_benchmark/src/walltime.rs @@ -2,10 +2,7 @@ use std::sync::Once; use divan::Bencher; use karva_core::{TestRunner, testing::setup_module}; -use karva_project::{ - path::absolute, - project::{Project, ProjectOptions}, -}; +use karva_project::{Project, ProjectOptions, absolute}; use karva_test::{InstalledProject, RealWorldProject}; pub struct ProjectBenchmark<'a> { diff --git a/crates/karva_cli/src/args.rs b/crates/karva_cli/src/args.rs index 8db14e81..3f80130c 100644 --- a/crates/karva_cli/src/args.rs +++ b/crates/karva_cli/src/args.rs @@ -6,7 +6,7 @@ use clap::{ styling::{AnsiColor, Effects}, }, }; -use karva_project::project::{ProjectOptions, TestPrefix}; +use karva_project::{ProjectOptions, TestPrefix}; use ruff_db::diagnostic::DiagnosticFormat; use crate::logging::Verbosity; diff --git a/crates/karva_cli/src/lib.rs b/crates/karva_cli/src/lib.rs index 40cd0a95..550ca0bd 100644 --- a/crates/karva_cli/src/lib.rs +++ b/crates/karva_cli/src/lib.rs @@ -11,10 +11,7 @@ use colored::Colorize; use karva_core::{ DummyReporter, Reporter, TestCaseReporter, TestRunner, utils::current_python_version, }; -use karva_project::{ - path::absolute, - project::{Project, ProjectMetadata}, -}; +use karva_project::{Project, ProjectMetadata, absolute}; use crate::{ args::{Command, TerminalColor, TestCommand}, diff --git a/crates/karva_cli/src/logging.rs b/crates/karva_cli/src/logging.rs index 881ddecc..3c628a64 100644 --- a/crates/karva_cli/src/logging.rs +++ b/crates/karva_cli/src/logging.rs @@ -1,7 +1,7 @@ use std::{fmt, fs::File, io::BufWriter}; use colored::Colorize; -use karva_project::verbosity::VerbosityLevel; +use karva_project::VerbosityLevel; use tracing::{Event, Subscriber}; use tracing_subscriber::{ EnvFilter, diff --git a/crates/karva_core/src/diagnostic/mod.rs b/crates/karva_core/src/diagnostic/mod.rs index 787347e6..72faeac1 100644 --- a/crates/karva_core/src/diagnostic/mod.rs +++ b/crates/karva_core/src/diagnostic/mod.rs @@ -6,7 +6,7 @@ use std::collections::HashMap; -use karva_project::path::TestPathError; +use karva_project::TestPathError; use pyo3::{Py, PyAny, PyErr, Python}; use ruff_db::diagnostic::{ Annotation, Diagnostic, Severity, Span, SubDiagnostic, SubDiagnosticSeverity, diff --git a/crates/karva_core/src/discovery/discoverer.rs b/crates/karva_core/src/discovery/discoverer.rs index 13ab3ed2..fe16017a 100644 --- a/crates/karva_core/src/discovery/discoverer.rs +++ b/crates/karva_core/src/discovery/discoverer.rs @@ -1,6 +1,6 @@ use camino::{Utf8Path, Utf8PathBuf}; use ignore::WalkBuilder; -use karva_project::path::TestPath; +use karva_project::TestPath; use pyo3::prelude::*; #[cfg(test)] @@ -264,7 +264,7 @@ impl DiscoveryMode { mod tests { use insta::{allow_duplicates, assert_snapshot}; - use karva_project::{Project, project::ProjectOptions}; + use karva_project::{Project, ProjectOptions}; use karva_test::TestContext; use super::*; diff --git a/crates/karva_core/src/discovery/models/function.rs b/crates/karva_core/src/discovery/models/function.rs index b24c5655..5e770017 100644 --- a/crates/karva_core/src/discovery/models/function.rs +++ b/crates/karva_core/src/discovery/models/function.rs @@ -64,7 +64,7 @@ impl RequiresFixtures for TestFunction { #[cfg(test)] mod tests { - use karva_project::project::Project; + use karva_project::Project; use karva_test::TestContext; use crate::{ diff --git a/crates/karva_core/src/name.rs b/crates/karva_core/src/name.rs index 8b110299..8a094906 100644 --- a/crates/karva_core/src/name.rs +++ b/crates/karva_core/src/name.rs @@ -1,5 +1,5 @@ use camino::Utf8PathBuf; -use karva_project::utils::module_name; +use karva_project::module_name; /// Represents a fully qualified function name including its module path. #[derive(Clone, Debug, PartialEq, Eq, Hash)] diff --git a/crates/karva_core/src/utils.rs b/crates/karva_core/src/utils.rs index d163696c..6b7912e7 100644 --- a/crates/karva_core/src/utils.rs +++ b/crates/karva_core/src/utils.rs @@ -1,7 +1,7 @@ use std::{collections::HashMap, fmt::Write}; use camino::Utf8Path; -use karva_project::project::{Project, ProjectOptions}; +use karva_project::{Project, ProjectOptions}; use pyo3::{PyResult, Python, prelude::*, types::PyAnyMethods}; use ruff_python_ast::PythonVersion; use ruff_source_file::{SourceFile, SourceFileBuilder}; diff --git a/crates/karva_core/tests/integration/basic.rs b/crates/karva_core/tests/integration/basic.rs index 5b85dd84..dcb668d4 100644 --- a/crates/karva_core/tests/integration/basic.rs +++ b/crates/karva_core/tests/integration/basic.rs @@ -1,7 +1,7 @@ use camino::Utf8PathBuf; use insta::assert_snapshot; use karva_core::{StandardTestRunner, TestRunner, testing::setup_module}; -use karva_project::{Project, project::ProjectOptions}; +use karva_project::{Project, ProjectOptions}; use karva_test::TestContext; use crate::common::TestRunnerExt; diff --git a/crates/karva_core/tests/integration/extensions/fixtures/dynamic.rs b/crates/karva_core/tests/integration/extensions/fixtures/dynamic.rs index c18d3b2a..acd1f570 100644 --- a/crates/karva_core/tests/integration/extensions/fixtures/dynamic.rs +++ b/crates/karva_core/tests/integration/extensions/fixtures/dynamic.rs @@ -1,6 +1,6 @@ use insta::{allow_duplicates, assert_snapshot}; use karva_core::{StandardTestRunner, TestRunner}; -use karva_project::{Project, project::ProjectOptions}; +use karva_project::{Project, ProjectOptions}; use karva_test::TestContext; use rstest::rstest; diff --git a/crates/karva_diff/src/main.rs b/crates/karva_diff/src/main.rs index fcd5ef78..2f359d24 100644 --- a/crates/karva_diff/src/main.rs +++ b/crates/karva_diff/src/main.rs @@ -8,7 +8,7 @@ use std::{ use anyhow::{Context, Result}; use camino::Utf8PathBuf; use clap::Parser; -use karva_project::path::absolute; +use karva_project::absolute; use karva_test::{RealWorldProject, all_projects}; use tempfile::NamedTempFile; diff --git a/crates/karva_project/Cargo.toml b/crates/karva_project/Cargo.toml index 99607dd1..88ea9df7 100644 --- a/crates/karva_project/Cargo.toml +++ b/crates/karva_project/Cargo.toml @@ -15,9 +15,18 @@ camino = { workspace = true } ruff_python_ast = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] } ruff_db = { workspace = true } +tracing = { workspace = true } +filetime = { workspace = true } +glob = { workspace = true } +rustc-hash = { workspace = true } +dunce = { workspace = true } +etcetera = { workspace = true } +ignore = { workspace = true } +thiserror = { workspace = true } [dev-dependencies] karva_test = { workspace = true } +tempfile = { workspace = true } [lints] workspace = true diff --git a/crates/karva_project/src/db.rs b/crates/karva_project/src/db.rs new file mode 100644 index 00000000..af5fbe63 --- /dev/null +++ b/crates/karva_project/src/db.rs @@ -0,0 +1,6 @@ +use crate::{Project, System}; + +pub trait Db { + fn system(&self) -> &dyn System; + fn project(&self) -> &Project; +} diff --git a/crates/karva_project/src/envs.rs b/crates/karva_project/src/envs.rs new file mode 100644 index 00000000..907c7e93 --- /dev/null +++ b/crates/karva_project/src/envs.rs @@ -0,0 +1,19 @@ +use std::num::NonZeroUsize; + +pub struct EnvVars; + +impl EnvVars { + // Externally defined environment variables + + /// This is a standard Rayon environment variable. + pub const RAYON_NUM_THREADS: &'static str = "RAYON_NUM_THREADS"; +} + +pub fn max_parallelism() -> NonZeroUsize { + std::env::var(EnvVars::RAYON_NUM_THREADS) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or_else(|| { + std::thread::available_parallelism().unwrap_or_else(|_| NonZeroUsize::new(1).unwrap()) + }) +} diff --git a/crates/karva_project/src/lib.rs b/crates/karva_project/src/lib.rs index a7cfc5fa..0c1309b3 100644 --- a/crates/karva_project/src/lib.rs +++ b/crates/karva_project/src/lib.rs @@ -1,6 +1,16 @@ -pub mod path; -pub mod project; -pub mod utils; -pub mod verbosity; +mod db; +mod envs; +mod path; +mod project; +pub(crate) mod system; +mod utils; +mod verbosity; -pub use project::Project; +pub use db::Db; +pub use envs::EnvVars; +pub(crate) use envs::max_parallelism; +pub use path::{TestPath, TestPathError, absolute}; +pub use project::{Project, ProjectMetadata, ProjectOptions, TestPrefix}; +pub use system::{OsSystem, System}; +pub use utils::module_name; +pub use verbosity::VerbosityLevel; diff --git a/crates/karva_project/src/path/mod.rs b/crates/karva_project/src/path/mod.rs index 042ffec0..489ec58f 100644 --- a/crates/karva_project/src/path/mod.rs +++ b/crates/karva_project/src/path/mod.rs @@ -1,4 +1,4 @@ -pub(crate) mod test_path; +pub mod test_path; mod utils; pub use test_path::{TestPath, TestPathError}; diff --git a/crates/karva_project/src/system.rs b/crates/karva_project/src/system.rs new file mode 100644 index 00000000..39263986 --- /dev/null +++ b/crates/karva_project/src/system.rs @@ -0,0 +1,480 @@ +use std::{ + error::Error, + ffi::OsStr, + fmt::Debug, + io, + path::{Path, PathBuf}, +}; + +use camino::{Utf8Path, Utf8PathBuf}; +use filetime::FileTime; +use glob::PatternError; + +mod os; +pub mod walk_directory; + +pub use os::OsSystem; +use walk_directory::WalkDirectoryBuilder; + +pub type Result = std::io::Result; + +/// The system on which Karva runs. +/// +/// Karva supports running on the CLI. +/// +/// Abstracting the system enables tests to use a more efficient in-memory file system. +pub trait System: Debug + Sync + Send { + /// Reads the metadata of the file or directory at `path`. + /// + /// This function will traverse symbolic links to query information about the destination file. + fn path_metadata(&self, path: &Utf8Path) -> Result; + + /// Returns the canonical, absolute form of a path with all intermediate components normalized + /// and symbolic links resolved. + /// + /// # Errors + /// This function will return an error in the following situations, but is not limited to just these cases: + /// * `path` does not exist. + /// * A non-final component in `path` is not a directory. + /// * the symlink target path is not valid Unicode. + /// + /// ## Windows long-paths + /// Unlike `std::fs::canonicalize`, this function does remove UNC prefixes if possible. + /// See [`dunce::canonicalize`] for more information. + fn canonicalize_path(&self, path: &Utf8Path) -> Result; + + /// Returns the source type for `path` if known or `None`. + /// + /// The default is to always return `None`, assuming the system + /// has no additional information and that the caller should + /// rely on the file extension instead. + /// + /// This is primarily used for the LSP integration to respect + /// the chosen language (or the fact that it is a notebook) in + /// the editor. + fn source_type(&self, path: &Utf8Path) -> Option { + let _ = path; + None + } + + /// Reads the content of the file at `path` into a [`String`]. + fn read_to_string(&self, path: &Utf8Path) -> Result; + + /// Returns `true` if `path` exists. + fn path_exists(&self, path: &Utf8Path) -> bool { + self.path_metadata(path).is_ok() + } + + /// Returns `true` if `path` exists and is a directory. + fn is_directory(&self, path: &Utf8Path) -> bool { + self.path_metadata(path) + .is_ok_and(|metadata| metadata.file_type.is_directory()) + } + + /// Returns `true` if `path` exists and is a file. + fn is_file(&self, path: &Utf8Path) -> bool { + self.path_metadata(path) + .is_ok_and(|metadata| metadata.file_type.is_file()) + } + + /// Returns the current working directory + fn current_directory(&self) -> &Utf8Path; + + /// Returns the directory path where user configurations are stored. + /// + /// Returns `None` if no such convention exists for the system. + fn user_config_directory(&self) -> Option; + + /// Returns the directory path where cached files are stored. + /// + /// Returns `None` if no such convention exists for the system. + fn cache_dir(&self) -> Option; + + /// Iterate over the contents of the directory at `path`. + /// + /// The returned iterator must have the following properties: + /// - It only iterates over the top level of the directory, + /// i.e., it does not recurse into subdirectories. + /// - It skips the current and parent directories (`.` and `..` + /// respectively). + /// - The iterator yields `std::io::Result` instances. + /// For each instance, an `Err` variant may signify that the path + /// of the entry was not valid UTF8, in which case it should be an + /// [`std::io::Error`] with the `ErrorKind` set to + /// [`std::io::ErrorKind::InvalidData`] and the payload set to a + /// [`camino::FromPathBufError`]. It may also indicate that + /// "some sort of intermittent IO error occurred during iteration" + /// (language taken from the [`std::fs::read_dir`] documentation). + /// + /// # Errors + /// Returns an error: + /// - if `path` does not exist in the system, + /// - if `path` does not point to a directory, + /// - if the process does not have sufficient permissions to + /// view the contents of the directory at `path` + /// - May also return an error in some other situations as well. + fn read_directory<'a>( + &'a self, + path: &Utf8Path, + ) -> Result> + 'a>>; + + /// Recursively walks the content of `path`. + /// + /// It is allowed to pass a `path` that points to a file. In this case, the walker + /// yields a single entry for that file. + fn walk_directory(&self, path: &Utf8Path) -> WalkDirectoryBuilder; + + /// Return an iterator that produces all the `Path`s that match the given + /// pattern using default match options, which may be absolute or relative to + /// the current working directory. + /// + /// This may return an error if the pattern is invalid. + fn glob( + &self, + pattern: &str, + ) -> std::result::Result< + Box> + '_>, + PatternError, + >; + + /// Fetches the environment variable `key` from the current process. + /// + /// # Errors + /// + /// Returns [`std::env::VarError::NotPresent`] if: + /// - The variable is not set. + /// - The variable's name contains an equal sign or NUL (`'='` or `'\0'`). + /// + /// Returns [`std::env::VarError::NotUnicode`] if the variable's value is not valid + /// Unicode. + fn env_var(&self, name: &str) -> std::result::Result { + let _ = name; + Err(std::env::VarError::NotPresent) + } + + /// Returns a handle to a [`WritableSystem`] if this system is writeable. + fn as_writable(&self) -> Option<&dyn WritableSystem>; + + fn as_any(&self) -> &dyn std::any::Any; + + fn as_any_mut(&mut self) -> &mut dyn std::any::Any; + + fn dyn_clone(&self) -> Box; +} + +/// System trait for non-readonly systems. +pub trait WritableSystem: System { + /// Creates a file at the given path. + /// + /// Returns an error if the file already exists. + fn create_new_file(&self, path: &Utf8Path) -> Result<()>; + + /// Writes the given content to the file at the given path. + fn write_file(&self, path: &Utf8Path, content: &str) -> Result<()>; + + /// Creates a directory at `path` as well as any intermediate directories. + fn create_directory_all(&self, path: &Utf8Path) -> Result<()>; + + /// Reads the provided file from the system cache, or creates the file if necessary. + /// + /// Returns `Ok(None)` if the system does not expose a suitable cache directory. + fn get_or_cache( + &self, + path: &Utf8Path, + read_contents: &dyn Fn() -> Result, + ) -> Result> { + let Some(cache_dir) = self.cache_dir() else { + return Ok(None); + }; + + let cache_path = cache_dir.join(path); + + // The file has already been cached. + if self.is_file(&cache_path) { + return Ok(Some(cache_path)); + } + + // Read the file contents. + let contents = read_contents()?; + + // Create the parent directory. + self.create_directory_all(cache_path.parent().unwrap())?; + + // Create and write to the file on the system. + // + // Note that `create_new_file` will fail if the file has already been created. This + // ensures that only one thread/process ever attempts to write to it to avoid corrupting + // the cache. + self.create_new_file(&cache_path)?; + self.write_file(&cache_path, &contents)?; + + Ok(Some(cache_path)) + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct Metadata { + revision: FileRevision, + permissions: Option, + file_type: FileType, +} + +impl Metadata { + pub const fn new( + revision: FileRevision, + permissions: Option, + file_type: FileType, + ) -> Self { + Self { + revision, + permissions, + file_type, + } + } + + pub const fn revision(&self) -> FileRevision { + self.revision + } + + pub const fn permissions(&self) -> Option { + self.permissions + } + + pub const fn file_type(&self) -> FileType { + self.file_type + } +} + +#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)] +pub enum FileType { + File, + Directory, + Symlink, +} + +impl FileType { + pub const fn is_file(self) -> bool { + matches!(self, Self::File) + } + + pub const fn is_directory(self) -> bool { + matches!(self, Self::Directory) + } + + pub const fn is_symlink(self) -> bool { + matches!(self, Self::Symlink) + } +} + +impl From for FileType { + fn from(file_type: std::fs::FileType) -> Self { + if file_type.is_file() { + Self::File + } else if file_type.is_dir() { + Self::Directory + } else { + Self::Symlink + } + } +} + +/// A number representing the revision of a file. +/// +/// Two revisions that don't compare equal signify that the file has been modified. +/// Revisions aren't guaranteed to be monotonically increasing or in any specific order. +/// +/// Possible revisions are: +/// * The last modification time of the file. +/// * The hash of the file's content. +/// * The revision as it comes from an external system, for example the LSP. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Default)] +pub struct FileRevision(u128); + +impl FileRevision { + pub const fn new(value: u128) -> Self { + Self(value) + } + + pub fn now() -> Self { + Self::from(file_time_now()) + } + + pub const fn zero() -> Self { + Self(0) + } + + #[must_use] + pub const fn as_u128(self) -> u128 { + self.0 + } +} + +impl From for FileRevision { + fn from(value: u128) -> Self { + Self(value) + } +} + +impl From for FileRevision { + fn from(value: u64) -> Self { + Self(u128::from(value)) + } +} + +impl From for FileRevision { + fn from(value: filetime::FileTime) -> Self { + #[allow(clippy::cast_sign_loss)] + let seconds = value.seconds() as u128; + let seconds = seconds << 64; + let nanos = u128::from(value.nanoseconds()); + + Self(seconds | nanos) + } +} + +pub fn file_time_now() -> FileTime { + FileTime::now() +} + +#[derive(Debug, PartialEq, Eq)] +pub struct DirectoryEntry { + path: Utf8PathBuf, + file_type: FileType, +} + +impl DirectoryEntry { + pub const fn new(path: Utf8PathBuf, file_type: FileType) -> Self { + Self { path, file_type } + } + + pub fn into_path(self) -> Utf8PathBuf { + self.path + } + + pub fn path(&self) -> &Utf8Path { + &self.path + } + + pub const fn file_type(&self) -> FileType { + self.file_type + } +} + +/// A glob iteration error. +/// +/// This is typically returned when a particular path cannot be read +/// to determine if its contents match the glob pattern. This is possible +/// if the program lacks the appropriate permissions, for example. +#[derive(Debug)] +pub struct GlobError { + path: PathBuf, + error: GlobErrorKind, +} + +impl GlobError { + /// The Path that the error corresponds to. + pub fn path(&self) -> &Path { + &self.path + } + + pub const fn kind(&self) -> &GlobErrorKind { + &self.error + } +} + +impl Error for GlobError {} + +impl std::fmt::Display for GlobError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match &self.error { + GlobErrorKind::IOError(error) => { + write!( + f, + "attempting to read `{}` resulted in an error: {error}", + self.path.display(), + ) + } + GlobErrorKind::NonUtf8Path => { + write!(f, "`{}` is not a valid UTF-8 path", self.path.display(),) + } + } + } +} + +impl From for GlobError { + fn from(value: glob::GlobError) -> Self { + Self { + path: value.path().to_path_buf(), + error: GlobErrorKind::IOError(value.into_error()), + } + } +} + +#[derive(Debug)] +pub enum GlobErrorKind { + IOError(io::Error), + NonUtf8Path, +} + +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] +pub enum PySourceType { + /// The source is a Python file (`.py`, `.pyw`). + /// Note: `.pyw` files contain Python code, but do not represent importable namespaces. + /// Consider adding a separate source type later if combining the two causes issues. + #[default] + Python, + /// The source is a Python stub file (`.pyi`). + Stub, + /// The source is a Jupyter notebook (`.ipynb`). + Ipynb, +} + +impl PySourceType { + /// Infers the source type from the file extension. + /// + /// Falls back to `Python` if the extension is not recognized. + pub fn from_extension(extension: &str) -> Self { + Self::try_from_extension(extension).unwrap_or_default() + } + + /// Infers the source type from the file extension. + pub fn try_from_extension(extension: &str) -> Option { + let ty = match extension { + "pyi" => Self::Stub, + "py" | "pyw" => Self::Python, + "ipynb" => Self::Ipynb, + _ => return None, + }; + + Some(ty) + } + + pub fn try_from_path(path: impl AsRef) -> Option { + path.as_ref() + .extension() + .and_then(OsStr::to_str) + .and_then(Self::try_from_extension) + } + + pub const fn is_py_file(self) -> bool { + matches!(self, Self::Python) + } + + pub const fn is_stub(self) -> bool { + matches!(self, Self::Stub) + } + + pub const fn is_py_file_or_stub(self) -> bool { + matches!(self, Self::Python | Self::Stub) + } + + pub const fn is_ipynb(self) -> bool { + matches!(self, Self::Ipynb) + } +} + +impl> From

for PySourceType { + fn from(path: P) -> Self { + Self::try_from_path(path).unwrap_or_default() + } +} diff --git a/crates/karva_project/src/system/os.rs b/crates/karva_project/src/system/os.rs new file mode 100644 index 00000000..c26bff4c --- /dev/null +++ b/crates/karva_project/src/system/os.rs @@ -0,0 +1,575 @@ +use std::{any::Any, num::NonZeroUsize, path::PathBuf, sync::Arc}; + +use camino::{Utf8Path, Utf8PathBuf}; +use filetime::FileTime; + +use crate::{ + max_parallelism, + system::{ + DirectoryEntry, GlobError, GlobErrorKind, Metadata, Result, System, WritableSystem, + walk_directory::{ + self, DirectoryWalker, WalkDirectoryBuilder, WalkDirectoryConfiguration, + WalkDirectoryVisitorBuilder, WalkState, + }, + }, +}; + +/// A system implementation that uses the OS file system. +#[derive(Debug, Clone)] +pub struct OsSystem { + inner: Arc, +} + +#[derive(Default, Debug)] +struct OsSystemInner { + cwd: Utf8PathBuf, +} + +impl OsSystem { + pub fn new(cwd: impl AsRef) -> Self { + let cwd = cwd.as_ref(); + assert!(cwd.is_absolute()); + + tracing::debug!( + "Architecture: {}, OS: {}", + std::env::consts::ARCH, + std::env::consts::OS, + ); + + Self { + // Spreading `..Default` because it isn't possible to feature gate the initializer of a single field. + inner: Arc::new(OsSystemInner { + cwd: cwd.to_path_buf(), + }), + } + } + + #[cfg(unix)] + #[allow(clippy::unnecessary_wraps)] + fn permissions(metadata: &std::fs::Metadata) -> Option { + use std::os::unix::fs::PermissionsExt; + + Some(metadata.permissions().mode()) + } + + #[cfg(not(unix))] + fn permissions(_metadata: &std::fs::Metadata) -> Option { + None + } +} + +impl System for OsSystem { + fn path_metadata(&self, path: &Utf8Path) -> Result { + let metadata = path.as_std_path().metadata()?; + let last_modified = FileTime::from_last_modification_time(&metadata); + + Ok(Metadata { + revision: last_modified.into(), + permissions: Self::permissions(&metadata), + file_type: metadata.file_type().into(), + }) + } + + fn canonicalize_path(&self, path: &Utf8Path) -> Result { + path.canonicalize_utf8().map(|path| { + Utf8PathBuf::from_path_buf(dunce::simplified(path.as_std_path()).to_path_buf()).unwrap() + }) + } + + fn read_to_string(&self, path: &Utf8Path) -> Result { + std::fs::read_to_string(path.as_std_path()) + } + + fn path_exists(&self, path: &Utf8Path) -> bool { + path.as_std_path().exists() + } + + fn current_directory(&self) -> &Utf8Path { + &self.inner.cwd + } + + fn user_config_directory(&self) -> Option { + use etcetera::BaseStrategy as _; + + let strategy = etcetera::base_strategy::choose_base_strategy().ok()?; + Utf8PathBuf::from_path_buf(strategy.config_dir()).ok() + } + + fn cache_dir(&self) -> Option { + use etcetera::BaseStrategy as _; + + let cache_dir = etcetera::base_strategy::choose_base_strategy() + .ok() + .map(|dirs| dirs.cache_dir().join("ty")) + .map(|cache_dir| { + if cfg!(windows) { + cache_dir.join("cache") + } else { + cache_dir + } + }) + .and_then(|path| Utf8PathBuf::from_path_buf(path).ok()) + .unwrap_or_else(|| Utf8PathBuf::from(".ty_cache")); + + Some(cache_dir) + } + + fn read_directory( + &self, + path: &Utf8Path, + ) -> Result>>> { + Ok(Box::new(path.read_dir_utf8()?.map(|res| { + let res = res?; + + let file_type = res.file_type()?; + Ok(DirectoryEntry { + path: res.into_path(), + file_type: file_type.into(), + }) + }))) + } + + /// Creates a builder to recursively walk `path`. + /// + /// The walker ignores files according to [`ignore::WalkBuilder::standard_filters`] + /// when setting [`WalkDirectoryBuilder::standard_filters`] to true. + fn walk_directory(&self, path: &Utf8Path) -> WalkDirectoryBuilder { + WalkDirectoryBuilder::new( + path, + OsDirectoryWalker { + cwd: self.current_directory().to_path_buf(), + }, + ) + } + + fn glob( + &self, + pattern: &str, + ) -> std::result::Result< + Box>>, + glob::PatternError, + > { + glob::glob(pattern).map(|inner| { + let iterator = inner.map(|result| { + let path = result?; + + let system_path = Utf8PathBuf::from_path_buf(path).map_err(|path| GlobError { + path, + error: GlobErrorKind::NonUtf8Path, + })?; + + Ok(system_path) + }); + + let boxed: Box> = Box::new(iterator); + boxed + }) + } + + fn env_var(&self, name: &str) -> std::result::Result { + std::env::var(name) + } + + fn as_writable(&self) -> Option<&dyn WritableSystem> { + Some(self) + } + + fn as_any(&self) -> &dyn Any { + self + } + + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + fn dyn_clone(&self) -> Box { + Box::new(self.clone()) + } +} + +impl WritableSystem for OsSystem { + fn create_new_file(&self, path: &Utf8Path) -> Result<()> { + std::fs::File::create_new(path).map(drop) + } + + fn write_file(&self, path: &Utf8Path, content: &str) -> Result<()> { + std::fs::write(path.as_std_path(), content) + } + + fn create_directory_all(&self, path: &Utf8Path) -> Result<()> { + std::fs::create_dir_all(path.as_std_path()) + } +} + +#[derive(Debug)] +struct OsDirectoryWalker { + cwd: Utf8PathBuf, +} + +impl DirectoryWalker for OsDirectoryWalker { + fn walk( + &self, + visitor_builder: &mut dyn WalkDirectoryVisitorBuilder, + configuration: WalkDirectoryConfiguration, + ) { + let WalkDirectoryConfiguration { + paths, + ignore_hidden: hidden, + standard_filters, + } = configuration; + + let Some((first, additional)) = paths.split_first() else { + return; + }; + + let mut builder = ignore::WalkBuilder::new(first.as_std_path()); + builder.current_dir(self.cwd.as_std_path()); + + builder.standard_filters(standard_filters); + builder.hidden(hidden); + + for additional_path in additional { + builder.add(additional_path.as_std_path()); + } + + builder.threads(max_parallelism().min(NonZeroUsize::new(12).unwrap()).get()); + + builder.build_parallel().run(|| { + let mut visitor = visitor_builder.build(); + + Box::new(move |entry| { + match entry { + Ok(entry) => { + // SAFETY: The walkdir crate supports `stdin` files and `file_type` can be `None` for these files. + // We don't make use of this feature, which is why unwrapping here is ok. + let file_type = entry.file_type().unwrap(); + let depth = entry.depth(); + + // `walkdir` reports errors related to parsing ignore files as part of the entry. + // These aren't fatal for us. We should keep going even if an ignore file contains a syntax error. + // But we log the error here for better visibility (same as ripgrep, Ruff ignores it) + if let Some(error) = entry.error() { + tracing::warn!("{error}"); + } + + match Utf8PathBuf::from_path_buf(entry.into_path()) { + Ok(path) => { + let directory_entry = walk_directory::DirectoryEntry { + path, + file_type: file_type.into(), + depth, + }; + + visitor.visit(Ok(directory_entry)).into() + } + Err(path) => { + visitor.visit(Err(walk_directory::Error { + depth: Some(depth), + kind: walk_directory::ErrorKind::NonUtf8Path { path }, + })); + + // Skip the entire directory because all the paths won't be UTF-8 paths. + ignore::WalkState::Skip + } + } + } + Err(error) => match ignore_to_walk_directory_error(error, None, None) { + Ok(error) => visitor.visit(Err(error)).into(), + Err(error) => { + // This should only be reached when the error is a `.ignore` file related error + // (which, should not be reported here but the `ignore` crate doesn't distinguish between ignore and IO errors). + // Let's log the error to at least make it visible. + tracing::warn!("Failed to traverse directory: {error}."); + ignore::WalkState::Continue + } + }, + } + }) + }); + } +} + +#[cold] +fn ignore_to_walk_directory_error( + error: ignore::Error, + path: Option, + depth: Option, +) -> std::result::Result { + use ignore::Error; + + match error { + Error::WithPath { path, err } => ignore_to_walk_directory_error(*err, Some(path), depth), + Error::WithDepth { err, depth } => ignore_to_walk_directory_error(*err, path, Some(depth)), + Error::WithLineNumber { err, .. } => ignore_to_walk_directory_error(*err, path, depth), + Error::Loop { child, ancestor } => { + match ( + Utf8PathBuf::from_path_buf(child), + Utf8PathBuf::from_path_buf(ancestor), + ) { + (Ok(child), Ok(ancestor)) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::Loop { child, ancestor }, + }), + (Err(child), _) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::NonUtf8Path { path: child }, + }), + // We should never reach this because we should never traverse into a non UTF8 path but handle it anyway. + (_, Err(ancestor)) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::NonUtf8Path { path: ancestor }, + }), + } + } + + Error::Io(err) => match path.map(Utf8PathBuf::from_path_buf).transpose() { + Ok(path) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::Io { path, err }, + }), + Err(path) => Ok(walk_directory::Error { + depth, + kind: walk_directory::ErrorKind::NonUtf8Path { path }, + }), + }, + + // Ignore related errors, we warn about them but we don't abort iteration because of them. + error @ (Error::Glob { .. } + | Error::UnrecognizedFileType(_) + | Error::InvalidDefinition + | Error::Partial(..)) => Err(error), + } +} + +impl From for ignore::WalkState { + fn from(value: WalkState) -> Self { + match value { + WalkState::Continue => Self::Continue, + WalkState::Skip => Self::Skip, + WalkState::Quit => Self::Quit, + } + } +} + +#[cfg(test)] +mod tests { + use tempfile::TempDir; + + use super::*; + use crate::system::{DirectoryEntry, FileType, walk_directory::tests::DirectoryEntryToString}; + + #[test] + fn read_directory() { + let tempdir = TempDir::new().unwrap(); + let tempdir_path = tempdir.path(); + std::fs::create_dir_all(tempdir_path.join("a/foo")).unwrap(); + let files = &["b.ts", "a/bar.py", "d.rs", "a/foo/bar.py", "a/baz.pyi"]; + for path in files { + std::fs::File::create(tempdir_path.join(path)).unwrap(); + } + + let tempdir_path = Utf8Path::from_path(tempdir_path).unwrap(); + let fs = OsSystem::new(tempdir_path); + + let mut sorted_contents: Vec = fs + .read_directory(&tempdir_path.join("a")) + .unwrap() + .map(Result::unwrap) + .collect(); + sorted_contents.sort_by(|a, b| a.path.cmp(&b.path)); + + let expected_contents = vec![ + DirectoryEntry::new(tempdir_path.join("a/bar.py"), FileType::File), + DirectoryEntry::new(tempdir_path.join("a/baz.pyi"), FileType::File), + DirectoryEntry::new(tempdir_path.join("a/foo"), FileType::Directory), + ]; + assert_eq!(sorted_contents, expected_contents); + } + + #[test] + fn read_directory_nonexistent() { + let tempdir = TempDir::new().unwrap(); + + let fs = OsSystem::new(Utf8Path::from_path(tempdir.path()).unwrap()); + let result = fs.read_directory(Utf8Path::new("doesnt_exist")); + assert!(result.is_err_and(|error| error.kind() == std::io::ErrorKind::NotFound)); + } + + #[test] + fn read_directory_on_file() { + let tempdir = TempDir::new().unwrap(); + let tempdir_path = tempdir.path(); + std::fs::File::create(tempdir_path.join("a.py")).unwrap(); + + let tempdir_path = Utf8Path::from_path(tempdir_path).unwrap(); + let fs = OsSystem::new(tempdir_path); + let result = fs.read_directory(&tempdir_path.join("a.py")); + let Err(error) = result else { + panic!("Expected the read_dir() call to fail!"); + }; + + // We can't assert the error kind here because it's apparently an unstable feature! + // https://github.com/rust-lang/rust/issues/86442 + // assert_eq!(error.kind(), std::io::ErrorKind::NotADirectory); + + // We can't even assert the error message on all platforms, as it's different on Windows, + // where the message is "The directory name is invalid" rather than "Not a directory". + if cfg!(unix) { + assert!(error.to_string().contains("Not a directory")); + } + } + + #[test] + fn walk_directory() -> std::io::Result<()> { + let tempdir = TempDir::new()?; + + let root = tempdir.path(); + std::fs::create_dir_all(root.join("a/b"))?; + std::fs::write(root.join("foo.py"), "print('foo')")?; + std::fs::write(root.join("a/bar.py"), "print('bar')")?; + std::fs::write(root.join("a/baz.py"), "print('baz')")?; + std::fs::write(root.join("a/b/c.py"), "print('c')")?; + + let root_sys = Utf8Path::from_path(root).unwrap(); + let system = OsSystem::new(root_sys); + + let writer = DirectoryEntryToString::new(root_sys.to_path_buf()); + + system.walk_directory(root_sys).run(|| { + Box::new(|entry| { + writer.write_entry(entry); + + WalkState::Continue + }) + }); + + assert_eq!( + writer.to_string(), + r#"{ + "": ( + Directory, + 0, + ), + "a": ( + Directory, + 1, + ), + "a/b": ( + Directory, + 2, + ), + "a/b/c.py": ( + File, + 3, + ), + "a/bar.py": ( + File, + 2, + ), + "a/baz.py": ( + File, + 2, + ), + "foo.py": ( + File, + 1, + ), +}"# + ); + + Ok(()) + } + + #[test] + fn walk_directory_ignore() -> std::io::Result<()> { + let tempdir = TempDir::new()?; + + let root = tempdir.path(); + std::fs::create_dir_all(root.join("a/b"))?; + std::fs::write(root.join("foo.py"), "print('foo')\n")?; + std::fs::write(root.join("a/bar.py"), "print('bar')\n")?; + std::fs::write(root.join("a/baz.py"), "print('baz')\n")?; + + // Exclude the `b` directory. + std::fs::write(root.join("a/.ignore"), "b/\n")?; + std::fs::write(root.join("a/b/c.py"), "print('c')\n")?; + + let root_sys = Utf8Path::from_path(root).unwrap(); + let system = OsSystem::new(root_sys); + + let writer = DirectoryEntryToString::new(root_sys.to_path_buf()); + + system + .walk_directory(root_sys) + .standard_filters(true) + .run(|| { + Box::new(|entry| { + writer.write_entry(entry); + WalkState::Continue + }) + }); + + assert_eq!( + writer.to_string(), + r#"{ + "": ( + Directory, + 0, + ), + "a": ( + Directory, + 1, + ), + "a/bar.py": ( + File, + 2, + ), + "a/baz.py": ( + File, + 2, + ), + "foo.py": ( + File, + 1, + ), +}"# + ); + + Ok(()) + } + + #[test] + fn walk_directory_file() -> std::io::Result<()> { + let tempdir = TempDir::new()?; + + let root = tempdir.path(); + std::fs::write(root.join("foo.py"), "print('foo')\n")?; + + let root_sys = Utf8Path::from_path(root).unwrap(); + let system = OsSystem::new(root_sys); + + let writer = DirectoryEntryToString::new(root_sys.to_path_buf()); + + system + .walk_directory(&root_sys.join("foo.py")) + .standard_filters(true) + .run(|| { + Box::new(|entry| { + writer.write_entry(entry); + WalkState::Continue + }) + }); + + assert_eq!( + writer.to_string(), + r#"{ + "foo.py": ( + File, + 0, + ), +}"# + ); + + Ok(()) + } +} diff --git a/crates/karva_project/src/system/walk_directory.rs b/crates/karva_project/src/system/walk_directory.rs new file mode 100644 index 00000000..2b599125 --- /dev/null +++ b/crates/karva_project/src/system/walk_directory.rs @@ -0,0 +1,324 @@ +use std::{ + fmt::{Display, Formatter}, + path::PathBuf, +}; + +use camino::{Utf8Path, Utf8PathBuf}; + +use super::FileType; + +/// A builder for constructing a directory recursive traversal. +pub struct WalkDirectoryBuilder { + /// The implementation that does the directory walking. + walker: Box, + + /// The paths that should be walked. + paths: Vec, + + ignore_hidden: bool, + + standard_filters: bool, +} + +impl WalkDirectoryBuilder { + pub fn new(path: impl AsRef, walker: W) -> Self + where + W: DirectoryWalker + 'static, + { + Self { + walker: Box::new(walker), + paths: vec![path.as_ref().to_path_buf()], + ignore_hidden: true, + standard_filters: true, + } + } + + /// Adds a path that should be traversed recursively. + /// + /// Each additional path is traversed recursively. + /// This should be preferred over building multiple + /// walkers since it enables reusing resources. + #[expect(clippy::should_implement_trait)] + pub fn add(mut self, path: impl AsRef) -> Self { + self.paths.push(path.as_ref().to_path_buf()); + self + } + + /// Whether hidden files should be ignored. + /// + /// The definition of what a hidden file depends on the [`System`](super::System) and can be platform-dependent. + /// + /// This is enabled by default. + pub fn ignore_hidden(mut self, hidden: bool) -> Self { + self.ignore_hidden = hidden; + self + } + + /// Enables all the standard ignore filters. + /// + /// This toggles, as a group, all the filters that are enabled by default: + /// * [`hidden`](Self::ignore_hidden) + /// * Any [`System`](super::System) specific filters according (e.g., respecting `.ignore`, `.gitignore`, files). + /// + /// Defaults to `true`. + pub fn standard_filters(mut self, standard_filters: bool) -> Self { + self.standard_filters = standard_filters; + self.ignore_hidden = standard_filters; + + self + } + + /// Runs the directory traversal and calls the passed `builder` to create visitors + /// that do the visiting. The walker may run multiple threads to visit the directories. + pub fn run<'s, F>(self, builder: F) + where + F: FnMut() -> FnVisitor<'s>, + { + self.visit(&mut FnBuilder { builder }); + } + + /// Runs the directory traversal and calls the passed `builder` to create visitors + /// that do the visiting. The walker may run multiple threads to visit the directories. + pub fn visit(self, builder: &mut dyn WalkDirectoryVisitorBuilder) { + let configuration = WalkDirectoryConfiguration { + paths: self.paths, + ignore_hidden: self.ignore_hidden, + standard_filters: self.standard_filters, + }; + + self.walker.walk(builder, configuration); + } +} + +/// Concrete walker that performs the directory walking. +pub trait DirectoryWalker { + fn walk( + &self, + builder: &mut dyn WalkDirectoryVisitorBuilder, + configuration: WalkDirectoryConfiguration, + ); +} + +/// Creates a visitor for each thread that does the visiting. +pub trait WalkDirectoryVisitorBuilder<'s> { + fn build(&mut self) -> Box; +} + +/// Visitor handling the individual directory entries. +pub trait WalkDirectoryVisitor: Send { + fn visit(&mut self, entry: std::result::Result) -> WalkState; +} + +struct FnBuilder { + builder: F, +} + +impl<'s, F> WalkDirectoryVisitorBuilder<'s> for FnBuilder +where + F: FnMut() -> FnVisitor<'s>, +{ + fn build(&mut self) -> Box { + let visitor = (self.builder)(); + Box::new(FnVisitorImpl(visitor)) + } +} + +type FnVisitor<'s> = + Box) -> WalkState + Send + 's>; + +struct FnVisitorImpl<'s>(FnVisitor<'s>); + +impl WalkDirectoryVisitor for FnVisitorImpl<'_> { + fn visit(&mut self, entry: std::result::Result) -> WalkState { + (self.0)(entry) + } +} + +pub struct WalkDirectoryConfiguration { + pub paths: Vec, + pub ignore_hidden: bool, + pub standard_filters: bool, +} + +/// An entry in a directory. +#[derive(Debug, Clone)] +pub struct DirectoryEntry { + pub(super) path: Utf8PathBuf, + pub(super) file_type: FileType, + pub(super) depth: usize, +} + +impl DirectoryEntry { + /// The full path that this entry represents. + pub fn path(&self) -> &Utf8Path { + &self.path + } + + /// The full path that this entry represents. + /// Analogous to [`DirectoryEntry::path`], but moves ownership of the path. + pub fn into_path(self) -> Utf8PathBuf { + self.path + } + + /// Return the file type for the file that this entry points to. + pub fn file_type(&self) -> FileType { + self.file_type + } + + /// Returns the depth at which this entry was created relative to the root. + pub fn depth(&self) -> usize { + self.depth + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum WalkState { + /// Continue walking as normal + Continue, + + /// If the entry given is a directory, don't descend into it. + /// In all other cases, this has no effect. + Skip, + + /// Quit the entire iterator as soon as possible. + /// + /// Note: This is an inherently asynchronous action. It's possible + /// for more entries to be yielded even after instructing the iterator to quit. + Quit, +} + +pub struct Error { + pub(super) depth: Option, + pub(super) kind: ErrorKind, +} + +impl Error { + pub fn depth(&self) -> Option { + self.depth + } + + pub fn kind(&self) -> &ErrorKind { + &self.kind + } +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match &self.kind { + ErrorKind::Loop { ancestor, child } => { + write!( + f, + "File system loop found: {child} points to an ancestor {ancestor}", + ) + } + ErrorKind::Io { + path: Some(path), + err, + } => { + write!(f, "IO error for operation on {path}: {err}") + } + ErrorKind::Io { path: None, err } => err.fmt(f), + ErrorKind::NonUtf8Path { path } => { + write!(f, "Non-UTF8 path: {}", path.display()) + } + } + } +} + +impl std::fmt::Debug for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(self, f) + } +} + +impl std::error::Error for Error {} + +#[derive(Debug)] +pub enum ErrorKind { + /// An error that occurs when a file loop is detected when traversing + /// symbolic links. + Loop { + ancestor: Utf8PathBuf, + child: Utf8PathBuf, + }, + + /// An error that occurs when doing I/O + Io { + path: Option, + err: std::io::Error, + }, + + /// A path is not a valid UTF-8 path. + NonUtf8Path { path: PathBuf }, +} + +#[cfg(test)] +pub(super) mod tests { + use std::collections::BTreeMap; + + use crate::system::{ + FileType, Utf8PathBuf, + walk_directory::{DirectoryEntry, Error}, + }; + + /// Test helper that creates a visual representation of the visited directory entries. + pub(crate) struct DirectoryEntryToString { + root_path: Utf8PathBuf, + inner: std::sync::Mutex, + } + + impl DirectoryEntryToString { + pub(crate) fn new(root_path: Utf8PathBuf) -> Self { + Self { + root_path, + inner: std::sync::Mutex::new(DirectoryEntryToStringInner::default()), + } + } + + pub(crate) fn write_entry(&self, entry: Result) { + let mut inner = self.inner.lock().unwrap(); + let DirectoryEntryToStringInner { errors, visited } = &mut *inner; + + match entry { + Ok(entry) => { + let relative_path = entry + .path() + .strip_prefix(&self.root_path) + .unwrap_or(entry.path()); + + let unix_path = relative_path + .components() + .map(|component| component.as_str()) + .collect::>() + .join("/"); + + visited.insert(unix_path, (entry.file_type, entry.depth)); + } + Err(error) => { + errors.push_str(&error.to_string()); + errors.push('\n'); + } + } + } + } + + impl std::fmt::Display for DirectoryEntryToString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let inner = self.inner.lock().unwrap(); + write!(f, "{paths:#?}", paths = inner.visited)?; + + if !inner.errors.is_empty() { + writeln!(f, "\n\n{errors}", errors = inner.errors).unwrap(); + } + + Ok(()) + } + } + + #[derive(Default)] + struct DirectoryEntryToStringInner { + errors: String, + /// Stores the visited path. The key is the relative path to the root, using `/` as path separator. + visited: BTreeMap, + } +} From 37fc19b2b4054f129473f0a4a30ffef8f9a993eb Mon Sep 17 00:00:00 2001 From: Matthew Mckee Date: Wed, 3 Dec 2025 14:24:33 +0000 Subject: [PATCH 02/13] Parallel collection --- Cargo.lock | 31 + crates/karva_core/Cargo.toml | 2 + crates/karva_core/src/collection/collector.rs | 241 ++++++++ crates/karva_core/src/collection/mod.rs | 5 + crates/karva_core/src/collection/models.rs | 254 ++++++++ crates/karva_core/src/discovery/discoverer.rs | 272 +++------ crates/karva_core/src/lib.rs | 1 + crates/karva_project/src/db.rs | 6 - crates/karva_project/src/envs.rs | 19 - crates/karva_project/src/lib.rs | 7 - crates/karva_project/src/system.rs | 480 --------------- crates/karva_project/src/system/os.rs | 575 ------------------ .../src/system/walk_directory.rs | 324 ---------- 13 files changed, 604 insertions(+), 1613 deletions(-) create mode 100644 crates/karva_core/src/collection/collector.rs create mode 100644 crates/karva_core/src/collection/mod.rs create mode 100644 crates/karva_core/src/collection/models.rs delete mode 100644 crates/karva_project/src/db.rs delete mode 100644 crates/karva_project/src/envs.rs delete mode 100644 crates/karva_project/src/system.rs delete mode 100644 crates/karva_project/src/system/os.rs delete mode 100644 crates/karva_project/src/system/walk_directory.rs diff --git a/Cargo.lock b/Cargo.lock index 58e5018d..6a8b6092 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -501,6 +501,15 @@ dependencies = [ "itertools 0.10.5", ] +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -1162,6 +1171,7 @@ version = "0.0.0" dependencies = [ "camino", "colored 3.0.0", + "crossbeam-channel", "ctor", "ignore", "indexmap", @@ -1169,6 +1179,7 @@ dependencies = [ "karva_project", "karva_test", "pyo3", + "rayon", "regex", "rstest", "ruff_db", @@ -1728,6 +1739,26 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.18" diff --git a/crates/karva_core/Cargo.toml b/crates/karva_core/Cargo.toml index 1aa8feef..d4d774a0 100644 --- a/crates/karva_core/Cargo.toml +++ b/crates/karva_core/Cargo.toml @@ -27,6 +27,8 @@ tracing = { workspace = true } ignore = { workspace = true } regex = { workspace = true } indexmap = { workspace = true } +rayon = "1.11.0" +crossbeam-channel = "0.5.15" [dev-dependencies] regex = { workspace = true } diff --git a/crates/karva_core/src/collection/collector.rs b/crates/karva_core/src/collection/collector.rs new file mode 100644 index 00000000..281b182e --- /dev/null +++ b/crates/karva_core/src/collection/collector.rs @@ -0,0 +1,241 @@ +use std::{sync::Arc, thread}; + +use camino::{Utf8Path, Utf8PathBuf}; +use crossbeam_channel::unbounded; +use ignore::WalkBuilder; +use ruff_python_ast::Stmt; +use ruff_python_parser::{Mode, ParseOptions, parse_unchecked}; + +use super::models::{CollectedModule, CollectedPackage}; +use crate::{Context, diagnostic::report_invalid_path, discovery::ModuleType, name::ModulePath}; + +pub struct ParallelCollector<'ctx, 'proj, 'rep> { + context: &'ctx Context<'proj, 'rep>, +} + +impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { + pub const fn new(context: &'ctx Context<'proj, 'rep>) -> Self { + Self { context } + } + + /// Collect all function definitions from a single file. + fn collect_file(&self, path: &Utf8PathBuf) -> Option { + Self::collect_file_static( + path, + self.context.project().cwd(), + self.context.project().metadata().python_version(), + ) + } + + /// Collect from a single test file path. + pub fn collect_test_file(&self, path: &Utf8PathBuf) -> Option { + self.collect_file(path) + } + + /// Collect from a directory in parallel using `WalkParallel`. + pub fn collect_directory(&self, path: &Utf8PathBuf) -> CollectedPackage { + let mut package = CollectedPackage::new(path.clone()); + + // Create channels for communication + let (tx, rx) = unbounded(); + + // Spawn receiver thread to collect results + let receiver_handle = thread::spawn(move || { + let mut modules = Vec::new(); + for module in rx { + modules.push(module); + } + modules + }); + + // Walk directory in parallel and process files + let walker = self.create_parallel_walker(path); + let cwd = self.context.project().cwd().clone(); + let python_version = self.context.project().metadata().python_version(); + + walker.run(|| { + let tx = tx.clone(); + let cwd = cwd.clone(); + + Box::new(move |entry| { + let Ok(entry) = entry else { + return ignore::WalkState::Continue; + }; + + // Only process files + if !entry.file_type().is_some_and(|ft| ft.is_file()) { + return ignore::WalkState::Continue; + } + + let Ok(file_path) = Utf8PathBuf::from_path_buf(entry.path().to_path_buf()) else { + return ignore::WalkState::Continue; + }; + + // Collect the module + if let Some(module) = Self::collect_file_static(&file_path, &cwd, python_version) { + let _ = tx.send(module); + } + + ignore::WalkState::Continue + }) + }); + + // Drop the sender so receiver knows we're done + drop(tx); + + // Wait for receiver to finish + let modules = receiver_handle.join().unwrap(); + + // Add all collected modules to the package + for collected_module in modules { + match collected_module.module_type() { + ModuleType::Test => { + package.add_module(collected_module); + } + ModuleType::Configuration => { + package.add_configuration_module(collected_module); + } + } + } + + package + } + + /// Static version of `collect_file` that doesn't need self reference. + /// Used for parallel processing where we can't share &self across threads. + fn collect_file_static( + path: &Utf8PathBuf, + cwd: &Utf8PathBuf, + python_version: ruff_python_ast::PythonVersion, + ) -> Option { + let module_path = ModulePath::new(path, cwd)?; + + let Ok(source_text) = std::fs::read_to_string(path) else { + return None; + }; + + let module_type: ModuleType = path.into(); + + let mut collected_module = + CollectedModule::new(module_path, path.clone(), module_type, source_text.clone()); + + // Parse the file to collect function definitions + let parsed = parse_unchecked( + &source_text, + ParseOptions::from(Mode::Module).with_target_version(python_version), + ) + .try_into_module()?; + + // Collect all top-level function definitions + for stmt in &parsed.syntax().body { + if let Stmt::FunctionDef(function_def) = stmt { + collected_module.add_function_def(Arc::new(function_def.clone())); + } + } + + Some(collected_module) + } + + /// Collect from all paths and build a complete package structure. + pub fn collect_all(&self) -> CollectedPackage { + let mut session_package = CollectedPackage::new(self.context.project().cwd().clone()); + + // Process all test paths + for path_result in self.context.project().test_paths() { + let Ok(path) = path_result else { + // Report invalid path errors + if let Err(error) = path_result { + report_invalid_path(self.context, &error); + } + continue; + }; + + // Clone the path for parent configuration lookup + let path_for_config = path.path().to_owned(); + + match path { + karva_project::TestPath::File(file_path) => { + if let Some(module) = self.collect_test_file(&file_path) { + session_package.add_module(module); + } + } + karva_project::TestPath::Directory(dir_path) => { + let package = self.collect_directory(&dir_path); + session_package.add_package(package); + } + karva_project::TestPath::Function { + path: file_path, .. + } => { + // For specific function paths, still collect the whole module + if let Some(module) = self.collect_test_file(&file_path) { + session_package.add_module(module); + } + } + } + + // Collect parent configuration files + self.collect_parent_configuration(&path_for_config, &mut session_package); + } + + session_package.shrink(); + session_package + } + + /// Collect parent configuration files (conftest.py). + fn collect_parent_configuration( + &self, + path: &Utf8Path, + session_package: &mut CollectedPackage, + ) { + let mut current_path = if path.is_dir() { + path + } else { + match path.parent() { + Some(parent) => parent, + None => return, + } + }; + + loop { + let conftest_path = current_path.join("conftest.py"); + if conftest_path.exists() { + let mut package = CollectedPackage::new(current_path.to_path_buf()); + + if let Some(module) = self.collect_test_file(&conftest_path) { + package.add_configuration_module(module); + session_package.add_package(package); + } + } + + if current_path == self.context.project().cwd() { + break; + } + + current_path = match current_path.parent() { + Some(parent) => parent, + None => break, + }; + } + } + + /// Creates a configured parallel directory walker for Python file discovery. + fn create_parallel_walker(&self, path: &Utf8PathBuf) -> ignore::WalkParallel { + WalkBuilder::new(path) + .standard_filters(true) + .require_git(false) + .git_global(false) + .parents(true) + .git_ignore(!self.context.project().options().no_ignore()) + .types({ + let mut types = ignore::types::TypesBuilder::new(); + types.add("python", "*.py").unwrap(); + types.select("python"); + types.build().unwrap() + }) + .filter_entry(|entry| { + let file_name = entry.file_name(); + file_name != "__pycache__" + }) + .build_parallel() + } +} diff --git a/crates/karva_core/src/collection/mod.rs b/crates/karva_core/src/collection/mod.rs new file mode 100644 index 00000000..f04f8c14 --- /dev/null +++ b/crates/karva_core/src/collection/mod.rs @@ -0,0 +1,5 @@ +mod collector; +mod models; + +pub use collector::ParallelCollector; +pub use models::CollectedPackage; diff --git a/crates/karva_core/src/collection/models.rs b/crates/karva_core/src/collection/models.rs new file mode 100644 index 00000000..02f48579 --- /dev/null +++ b/crates/karva_core/src/collection/models.rs @@ -0,0 +1,254 @@ +use std::collections::HashMap; +use std::sync::Arc; + +use camino::Utf8PathBuf; +use ruff_python_ast::StmtFunctionDef; + +use crate::discovery::ModuleType; +use crate::name::ModulePath; + +/// A collected module containing raw AST function definitions. +/// This is populated during the parallel collection phase. +#[derive(Debug, Clone)] +pub struct CollectedModule { + path: ModulePath, + file_path: Utf8PathBuf, + module_type: ModuleType, + source_text: String, + /// All function definitions found in this module (both tests and fixtures) + function_defs: Vec>, +} + +impl CollectedModule { + pub const fn new( + path: ModulePath, + file_path: Utf8PathBuf, + module_type: ModuleType, + source_text: String, + ) -> Self { + Self { + path, + file_path, + module_type, + source_text, + function_defs: Vec::new(), + } + } + + pub fn add_function_def(&mut self, function_def: Arc) { + self.function_defs.push(function_def); + } + + pub const fn path(&self) -> &ModulePath { + &self.path + } + + pub const fn file_path(&self) -> &Utf8PathBuf { + &self.file_path + } + + pub const fn module_type(&self) -> ModuleType { + self.module_type + } + + #[allow(dead_code)] + pub fn source_text(&self) -> &str { + &self.source_text + } + + #[allow(dead_code)] + pub fn function_defs(&self) -> &[Arc] { + &self.function_defs + } + + pub fn is_empty(&self) -> bool { + self.function_defs.is_empty() + } +} + +/// A collected package containing collected modules and subpackages. +/// This is populated during the parallel collection phase. +#[derive(Debug, Clone)] +pub struct CollectedPackage { + path: Utf8PathBuf, + modules: HashMap, + packages: HashMap, + configuration_module_path: Option, +} + +impl CollectedPackage { + pub fn new(path: Utf8PathBuf) -> Self { + Self { + path, + modules: HashMap::new(), + packages: HashMap::new(), + configuration_module_path: None, + } + } + + pub const fn path(&self) -> &Utf8PathBuf { + &self.path + } + + pub const fn modules(&self) -> &HashMap { + &self.modules + } + + pub const fn packages(&self) -> &HashMap { + &self.packages + } + + #[allow(dead_code)] + pub const fn configuration_module_path(&self) -> Option<&Utf8PathBuf> { + self.configuration_module_path.as_ref() + } + + /// Add a module to this package. + /// + /// If the module path does not start with our path, do nothing. + /// + /// If the module path equals our path, use update method. + /// + /// Otherwise, strip the current path from the start and add the module to the appropriate sub-package. + pub fn add_module(&mut self, module: CollectedModule) { + if !module.file_path().starts_with(self.path()) { + return; + } + + if module.is_empty() { + return; + } + + let Some(parent_path) = module.file_path().parent() else { + return; + }; + + if parent_path == self.path() { + if let Some(existing_module) = self.modules.get_mut(module.file_path()) { + existing_module.update(module); + } else { + if module.module_type() == ModuleType::Configuration { + self.configuration_module_path = Some(module.file_path().clone()); + } + self.modules.insert(module.file_path().clone(), module); + } + return; + } + + let Ok(relative_path) = module.file_path().strip_prefix(self.path()) else { + return; + }; + + let components: Vec<_> = relative_path.components().collect(); + + if components.is_empty() { + return; + } + + let first_component = components[0]; + let intermediate_path = self.path().join(first_component); + + if let Some(existing_package) = self.packages.get_mut(&intermediate_path) { + existing_package.add_module(module); + } else { + let mut new_package = Self::new(intermediate_path); + new_package.add_module(module); + self.packages + .insert(new_package.path().clone(), new_package); + } + } + + pub fn add_configuration_module(&mut self, module: CollectedModule) { + self.configuration_module_path = Some(module.file_path().clone()); + self.add_module(module); + } + + /// Add a package to this package. + /// + /// If the package path equals our path, use update method. + /// + /// Otherwise, strip the current path from the start and add the package to the appropriate sub-package. + pub fn add_package(&mut self, package: Self) { + if !package.path().starts_with(self.path()) { + return; + } + + if package.path() == self.path() { + self.update(package); + return; + } + + let Ok(relative_path) = package.path().strip_prefix(self.path()) else { + return; + }; + + let components: Vec<_> = relative_path.components().collect(); + + if components.is_empty() { + return; + } + + let first_component = components[0]; + let intermediate_path = self.path().join(first_component); + + if let Some(existing_package) = self.packages.get_mut(&intermediate_path) { + existing_package.add_package(package); + } else { + let mut new_package = Self::new(intermediate_path); + new_package.add_package(package); + self.packages + .insert(new_package.path().clone(), new_package); + } + } + + pub fn update(&mut self, package: Self) { + for (_, module) in package.modules { + self.add_module(module); + } + for (_, package) in package.packages { + self.add_package(package); + } + + if let Some(module_path) = package.configuration_module_path { + self.configuration_module_path = Some(module_path); + } + } + + pub fn is_empty(&self) -> bool { + self.modules.is_empty() && self.packages.is_empty() + } + + pub fn shrink(&mut self) { + self.modules.retain(|_, module| !module.is_empty()); + + if let Some(configuration_module) = self.configuration_module_path.as_ref() { + if !self.modules.contains_key(configuration_module) { + self.configuration_module_path = None; + } + } + + self.packages.retain(|_, package| !package.is_empty()); + + for package in self.packages.values_mut() { + package.shrink(); + } + } +} + +impl CollectedModule { + /// Update this module with another module. + /// Merges function definitions from the other module into this one. + pub fn update(&mut self, module: Self) { + if self.path == module.path { + for function_def in module.function_defs { + if !self + .function_defs + .iter() + .any(|existing| existing.name == function_def.name) + { + self.function_defs.push(function_def); + } + } + } + } +} diff --git a/crates/karva_core/src/discovery/discoverer.rs b/crates/karva_core/src/discovery/discoverer.rs index fe16017a..8cbcaea8 100644 --- a/crates/karva_core/src/discovery/discoverer.rs +++ b/crates/karva_core/src/discovery/discoverer.rs @@ -1,5 +1,4 @@ -use camino::{Utf8Path, Utf8PathBuf}; -use ignore::WalkBuilder; +use camino::Utf8PathBuf; use karva_project::TestPath; use pyo3::prelude::*; @@ -7,9 +6,8 @@ use pyo3::prelude::*; use crate::utils::attach_with_project; use crate::{ Context, - diagnostic::report_invalid_path, + collection::{CollectedPackage, ParallelCollector}, discovery::{DiscoveredModule, DiscoveredPackage, ModuleType, visitor::DiscoveredFunctions}, - name::ModulePath, utils::add_to_sys_path, }; @@ -28,235 +26,105 @@ impl<'ctx, 'proj, 'rep> StandardDiscoverer<'ctx, 'proj, 'rep> { } pub(crate) fn discover_with_py(self, py: Python<'_>) -> DiscoveredPackage { - let mut session_package = DiscoveredPackage::new(self.context.project().cwd().clone()); - let cwd = self.context.project().cwd(); if add_to_sys_path(py, cwd, 0).is_err() { - return session_package; + return DiscoveredPackage::new(cwd.clone()); } - tracing::info!("Discovering tests..."); + tracing::info!("Collecting test files in parallel..."); + + // Collect function-specific paths for filtering and check for shadowing + let mut function_filters = std::collections::HashMap::new(); + let mut all_file_paths = std::collections::HashSet::new(); + let mut all_directory_paths = std::collections::HashSet::new(); - for path in self.context.project().test_paths() { + for path in self.context.project().test_paths().into_iter().flatten() { match path { - Ok(path) => { - match &path { - TestPath::File(path) => { - let Some(module) = - self.discover_test_file(py, path, DiscoveryMode::All) - else { - continue; - }; - - session_package.add_module(module); - } - TestPath::Directory(path) => { - let package = self.discover_directory(py, path, DiscoveryMode::All); - - session_package.add_package(package); - } - TestPath::Function { - path, - function_name, - } => { - let Some(mut module) = - self.discover_test_file(py, path, DiscoveryMode::All) - else { - continue; - }; - - module.filter_test_functions(function_name); - - if !module.test_functions().is_empty() { - session_package.add_module(module); - } - } - } - - self.add_parent_configuration_packages(py, path.path(), &mut session_package); + TestPath::File(file_path) => { + all_file_paths.insert(file_path); + } + TestPath::Directory(dir_path) => { + all_directory_paths.insert(dir_path); } - Err(error) => { - report_invalid_path(self.context, &error); + TestPath::Function { path, function_name } => { + function_filters.insert(path, function_name); } } } - session_package.shrink(); - - session_package - } - - // Parse and run discovery on a single file - fn discover_test_file( - &self, - py: Python, - path: &Utf8PathBuf, - discovery_mode: DiscoveryMode, - ) -> Option { - let module_path = ModulePath::new(path, self.context.project().cwd())?; - - let mut module = DiscoveredModule::new(module_path, path.into()); - - let DiscoveredFunctions { - functions, - fixtures, - } = super::visitor::discover(self.context, py, &module); - - if !discovery_mode.is_configuration_only() { - module.extend_test_functions(functions); - } - - module.extend_fixtures(fixtures); - - Some(module) - } - - // This should look from the parent of path to the cwd for configuration files - fn add_parent_configuration_packages( - &self, - py: Python, - path: &Utf8Path, - session_package: &mut DiscoveredPackage, - ) { - let mut current_path = if path.is_dir() { - path - } else { - match path.parent() { - Some(parent) => parent, - None => return, + // Remove function filters if the file is explicitly listed or is in a listed directory + function_filters.retain(|file_path, _| { + // If the file itself is in the test paths, don't filter + if all_file_paths.contains(file_path) { + return false; } - }; - - loop { - let conftest_path = current_path.join("conftest.py"); - if conftest_path.exists() { - let mut package = DiscoveredPackage::new(current_path.to_path_buf()); + // If any parent directory is in the test paths, don't filter + for dir_path in &all_directory_paths { + if file_path.starts_with(dir_path) { + return false; + } + } + true + }); - let Some(module) = - self.discover_test_file(py, &conftest_path, DiscoveryMode::ConfigurationOnly) - else { - break; - }; + // Phase 1: Collection (parallel) - collect all AST function definitions + let collector = ParallelCollector::new(self.context); + let collected_package = collector.collect_all(); - package.add_configuration_module(module); + tracing::info!("Discovering test functions and fixtures..."); - session_package.add_package(package); - } + // Phase 2: Discovery (single-threaded) - convert collected AST to test functions and fixtures + let mut session_package = self.convert_collected_to_discovered(py, &collected_package, &function_filters); - if current_path == *self.context.project().cwd() { - break; - } + session_package.shrink(); - current_path = match current_path.parent() { - Some(parent) => parent, - None => break, - }; - } + session_package } - /// Discovers test files and packages within a directory. - /// - /// This method recursively walks through a directory structure to find Python - /// test files and subdirectories. It respects .gitignore files and filters - /// out common non-test directories like __pycache__. - fn discover_directory( + /// Convert a collected package to a discovered package by importing Python modules + /// and resolving test functions and fixtures. + fn convert_collected_to_discovered( &self, - py: Python, - path: &Utf8PathBuf, - discovery_mode: DiscoveryMode, + py: Python<'_>, + collected_package: &CollectedPackage, + function_filters: &std::collections::HashMap, ) -> DiscoveredPackage { - let walker = self.create_directory_walker(path); + let mut discovered_package = DiscoveredPackage::new(collected_package.path().clone()); - let mut package = DiscoveredPackage::new(path.clone()); + // Convert all modules + for collected_module in collected_package.modules().values() { + let mut module = DiscoveredModule::new( + collected_module.path().clone(), + collected_module.file_path().into(), + ); - for entry in walker { - let Ok(entry) = entry else { - continue; - }; - let current_path = Utf8PathBuf::from_path_buf(entry.path().to_path_buf()) - .expect("Path is not valid UTF-8"); - - // Skip the package directory itself - if package.path() == ¤t_path { - continue; - } + let DiscoveredFunctions { functions, fixtures } = + super::visitor::discover(self.context, py, &module); - match entry.file_type() { - Some(file_type) if file_type.is_dir() => { - if discovery_mode.is_configuration_only() { - continue; - } + module.extend_test_functions(functions); + module.extend_fixtures(fixtures); - let subpackage = self.discover_directory(py, ¤t_path, discovery_mode); + // Apply function filtering if this module has a function filter + if let Some(function_name) = function_filters.get(collected_module.file_path()) { + module.filter_test_functions(function_name); + } - package.add_package(subpackage); - } - Some(file_type) if file_type.is_file() => match (¤t_path).into() { - ModuleType::Test => { - if discovery_mode.is_configuration_only() { - continue; - } - - let Some(module) = - self.discover_test_file(py, ¤t_path, DiscoveryMode::All) - else { - continue; - }; - - package.add_module(module); - } - ModuleType::Configuration => { - let Some(module) = self.discover_test_file( - py, - ¤t_path, - DiscoveryMode::ConfigurationOnly, - ) else { - continue; - }; - - package.add_configuration_module(module); - } - }, - _ => {} + if collected_module.module_type() == ModuleType::Configuration { + discovered_package.add_configuration_module(module); + } else { + discovered_package.add_module(module); } } - package - } - - /// Creates a configured directory walker for Python file discovery. - fn create_directory_walker(&self, path: &Utf8PathBuf) -> ignore::Walk { - WalkBuilder::new(path) - .max_depth(Some(1)) - .standard_filters(true) - .require_git(false) - .git_global(false) - .parents(true) - .git_ignore(!self.context.project().options().no_ignore()) - .types({ - let mut types = ignore::types::TypesBuilder::new(); - types.add("python", "*.py").unwrap(); - types.select("python"); - types.build().unwrap() - }) - .filter_entry(|entry| { - let file_name = entry.file_name(); - file_name != "__pycache__" - }) - .build() - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub(super) enum DiscoveryMode { - All, - ConfigurationOnly, -} + // Convert all subpackages recursively + for collected_subpackage in collected_package.packages().values() { + let discovered_subpackage = + self.convert_collected_to_discovered(py, collected_subpackage, function_filters); + discovered_package.add_package(discovered_subpackage); + } -impl DiscoveryMode { - pub const fn is_configuration_only(self) -> bool { - matches!(self, Self::ConfigurationOnly) + discovered_package } } diff --git a/crates/karva_core/src/lib.rs b/crates/karva_core/src/lib.rs index 96f9a372..aca74bc7 100644 --- a/crates/karva_core/src/lib.rs +++ b/crates/karva_core/src/lib.rs @@ -1,3 +1,4 @@ +mod collection; mod context; pub(crate) mod diagnostic; pub(crate) mod discovery; diff --git a/crates/karva_project/src/db.rs b/crates/karva_project/src/db.rs deleted file mode 100644 index af5fbe63..00000000 --- a/crates/karva_project/src/db.rs +++ /dev/null @@ -1,6 +0,0 @@ -use crate::{Project, System}; - -pub trait Db { - fn system(&self) -> &dyn System; - fn project(&self) -> &Project; -} diff --git a/crates/karva_project/src/envs.rs b/crates/karva_project/src/envs.rs deleted file mode 100644 index 907c7e93..00000000 --- a/crates/karva_project/src/envs.rs +++ /dev/null @@ -1,19 +0,0 @@ -use std::num::NonZeroUsize; - -pub struct EnvVars; - -impl EnvVars { - // Externally defined environment variables - - /// This is a standard Rayon environment variable. - pub const RAYON_NUM_THREADS: &'static str = "RAYON_NUM_THREADS"; -} - -pub fn max_parallelism() -> NonZeroUsize { - std::env::var(EnvVars::RAYON_NUM_THREADS) - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or_else(|| { - std::thread::available_parallelism().unwrap_or_else(|_| NonZeroUsize::new(1).unwrap()) - }) -} diff --git a/crates/karva_project/src/lib.rs b/crates/karva_project/src/lib.rs index 0c1309b3..9d0e3c5a 100644 --- a/crates/karva_project/src/lib.rs +++ b/crates/karva_project/src/lib.rs @@ -1,16 +1,9 @@ -mod db; -mod envs; mod path; mod project; -pub(crate) mod system; mod utils; mod verbosity; -pub use db::Db; -pub use envs::EnvVars; -pub(crate) use envs::max_parallelism; pub use path::{TestPath, TestPathError, absolute}; pub use project::{Project, ProjectMetadata, ProjectOptions, TestPrefix}; -pub use system::{OsSystem, System}; pub use utils::module_name; pub use verbosity::VerbosityLevel; diff --git a/crates/karva_project/src/system.rs b/crates/karva_project/src/system.rs deleted file mode 100644 index 39263986..00000000 --- a/crates/karva_project/src/system.rs +++ /dev/null @@ -1,480 +0,0 @@ -use std::{ - error::Error, - ffi::OsStr, - fmt::Debug, - io, - path::{Path, PathBuf}, -}; - -use camino::{Utf8Path, Utf8PathBuf}; -use filetime::FileTime; -use glob::PatternError; - -mod os; -pub mod walk_directory; - -pub use os::OsSystem; -use walk_directory::WalkDirectoryBuilder; - -pub type Result = std::io::Result; - -/// The system on which Karva runs. -/// -/// Karva supports running on the CLI. -/// -/// Abstracting the system enables tests to use a more efficient in-memory file system. -pub trait System: Debug + Sync + Send { - /// Reads the metadata of the file or directory at `path`. - /// - /// This function will traverse symbolic links to query information about the destination file. - fn path_metadata(&self, path: &Utf8Path) -> Result; - - /// Returns the canonical, absolute form of a path with all intermediate components normalized - /// and symbolic links resolved. - /// - /// # Errors - /// This function will return an error in the following situations, but is not limited to just these cases: - /// * `path` does not exist. - /// * A non-final component in `path` is not a directory. - /// * the symlink target path is not valid Unicode. - /// - /// ## Windows long-paths - /// Unlike `std::fs::canonicalize`, this function does remove UNC prefixes if possible. - /// See [`dunce::canonicalize`] for more information. - fn canonicalize_path(&self, path: &Utf8Path) -> Result; - - /// Returns the source type for `path` if known or `None`. - /// - /// The default is to always return `None`, assuming the system - /// has no additional information and that the caller should - /// rely on the file extension instead. - /// - /// This is primarily used for the LSP integration to respect - /// the chosen language (or the fact that it is a notebook) in - /// the editor. - fn source_type(&self, path: &Utf8Path) -> Option { - let _ = path; - None - } - - /// Reads the content of the file at `path` into a [`String`]. - fn read_to_string(&self, path: &Utf8Path) -> Result; - - /// Returns `true` if `path` exists. - fn path_exists(&self, path: &Utf8Path) -> bool { - self.path_metadata(path).is_ok() - } - - /// Returns `true` if `path` exists and is a directory. - fn is_directory(&self, path: &Utf8Path) -> bool { - self.path_metadata(path) - .is_ok_and(|metadata| metadata.file_type.is_directory()) - } - - /// Returns `true` if `path` exists and is a file. - fn is_file(&self, path: &Utf8Path) -> bool { - self.path_metadata(path) - .is_ok_and(|metadata| metadata.file_type.is_file()) - } - - /// Returns the current working directory - fn current_directory(&self) -> &Utf8Path; - - /// Returns the directory path where user configurations are stored. - /// - /// Returns `None` if no such convention exists for the system. - fn user_config_directory(&self) -> Option; - - /// Returns the directory path where cached files are stored. - /// - /// Returns `None` if no such convention exists for the system. - fn cache_dir(&self) -> Option; - - /// Iterate over the contents of the directory at `path`. - /// - /// The returned iterator must have the following properties: - /// - It only iterates over the top level of the directory, - /// i.e., it does not recurse into subdirectories. - /// - It skips the current and parent directories (`.` and `..` - /// respectively). - /// - The iterator yields `std::io::Result` instances. - /// For each instance, an `Err` variant may signify that the path - /// of the entry was not valid UTF8, in which case it should be an - /// [`std::io::Error`] with the `ErrorKind` set to - /// [`std::io::ErrorKind::InvalidData`] and the payload set to a - /// [`camino::FromPathBufError`]. It may also indicate that - /// "some sort of intermittent IO error occurred during iteration" - /// (language taken from the [`std::fs::read_dir`] documentation). - /// - /// # Errors - /// Returns an error: - /// - if `path` does not exist in the system, - /// - if `path` does not point to a directory, - /// - if the process does not have sufficient permissions to - /// view the contents of the directory at `path` - /// - May also return an error in some other situations as well. - fn read_directory<'a>( - &'a self, - path: &Utf8Path, - ) -> Result> + 'a>>; - - /// Recursively walks the content of `path`. - /// - /// It is allowed to pass a `path` that points to a file. In this case, the walker - /// yields a single entry for that file. - fn walk_directory(&self, path: &Utf8Path) -> WalkDirectoryBuilder; - - /// Return an iterator that produces all the `Path`s that match the given - /// pattern using default match options, which may be absolute or relative to - /// the current working directory. - /// - /// This may return an error if the pattern is invalid. - fn glob( - &self, - pattern: &str, - ) -> std::result::Result< - Box> + '_>, - PatternError, - >; - - /// Fetches the environment variable `key` from the current process. - /// - /// # Errors - /// - /// Returns [`std::env::VarError::NotPresent`] if: - /// - The variable is not set. - /// - The variable's name contains an equal sign or NUL (`'='` or `'\0'`). - /// - /// Returns [`std::env::VarError::NotUnicode`] if the variable's value is not valid - /// Unicode. - fn env_var(&self, name: &str) -> std::result::Result { - let _ = name; - Err(std::env::VarError::NotPresent) - } - - /// Returns a handle to a [`WritableSystem`] if this system is writeable. - fn as_writable(&self) -> Option<&dyn WritableSystem>; - - fn as_any(&self) -> &dyn std::any::Any; - - fn as_any_mut(&mut self) -> &mut dyn std::any::Any; - - fn dyn_clone(&self) -> Box; -} - -/// System trait for non-readonly systems. -pub trait WritableSystem: System { - /// Creates a file at the given path. - /// - /// Returns an error if the file already exists. - fn create_new_file(&self, path: &Utf8Path) -> Result<()>; - - /// Writes the given content to the file at the given path. - fn write_file(&self, path: &Utf8Path, content: &str) -> Result<()>; - - /// Creates a directory at `path` as well as any intermediate directories. - fn create_directory_all(&self, path: &Utf8Path) -> Result<()>; - - /// Reads the provided file from the system cache, or creates the file if necessary. - /// - /// Returns `Ok(None)` if the system does not expose a suitable cache directory. - fn get_or_cache( - &self, - path: &Utf8Path, - read_contents: &dyn Fn() -> Result, - ) -> Result> { - let Some(cache_dir) = self.cache_dir() else { - return Ok(None); - }; - - let cache_path = cache_dir.join(path); - - // The file has already been cached. - if self.is_file(&cache_path) { - return Ok(Some(cache_path)); - } - - // Read the file contents. - let contents = read_contents()?; - - // Create the parent directory. - self.create_directory_all(cache_path.parent().unwrap())?; - - // Create and write to the file on the system. - // - // Note that `create_new_file` will fail if the file has already been created. This - // ensures that only one thread/process ever attempts to write to it to avoid corrupting - // the cache. - self.create_new_file(&cache_path)?; - self.write_file(&cache_path, &contents)?; - - Ok(Some(cache_path)) - } -} - -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Metadata { - revision: FileRevision, - permissions: Option, - file_type: FileType, -} - -impl Metadata { - pub const fn new( - revision: FileRevision, - permissions: Option, - file_type: FileType, - ) -> Self { - Self { - revision, - permissions, - file_type, - } - } - - pub const fn revision(&self) -> FileRevision { - self.revision - } - - pub const fn permissions(&self) -> Option { - self.permissions - } - - pub const fn file_type(&self) -> FileType { - self.file_type - } -} - -#[derive(Copy, Clone, Eq, PartialEq, Debug, Hash)] -pub enum FileType { - File, - Directory, - Symlink, -} - -impl FileType { - pub const fn is_file(self) -> bool { - matches!(self, Self::File) - } - - pub const fn is_directory(self) -> bool { - matches!(self, Self::Directory) - } - - pub const fn is_symlink(self) -> bool { - matches!(self, Self::Symlink) - } -} - -impl From for FileType { - fn from(file_type: std::fs::FileType) -> Self { - if file_type.is_file() { - Self::File - } else if file_type.is_dir() { - Self::Directory - } else { - Self::Symlink - } - } -} - -/// A number representing the revision of a file. -/// -/// Two revisions that don't compare equal signify that the file has been modified. -/// Revisions aren't guaranteed to be monotonically increasing or in any specific order. -/// -/// Possible revisions are: -/// * The last modification time of the file. -/// * The hash of the file's content. -/// * The revision as it comes from an external system, for example the LSP. -#[derive(Copy, Clone, Debug, Eq, PartialEq, Default)] -pub struct FileRevision(u128); - -impl FileRevision { - pub const fn new(value: u128) -> Self { - Self(value) - } - - pub fn now() -> Self { - Self::from(file_time_now()) - } - - pub const fn zero() -> Self { - Self(0) - } - - #[must_use] - pub const fn as_u128(self) -> u128 { - self.0 - } -} - -impl From for FileRevision { - fn from(value: u128) -> Self { - Self(value) - } -} - -impl From for FileRevision { - fn from(value: u64) -> Self { - Self(u128::from(value)) - } -} - -impl From for FileRevision { - fn from(value: filetime::FileTime) -> Self { - #[allow(clippy::cast_sign_loss)] - let seconds = value.seconds() as u128; - let seconds = seconds << 64; - let nanos = u128::from(value.nanoseconds()); - - Self(seconds | nanos) - } -} - -pub fn file_time_now() -> FileTime { - FileTime::now() -} - -#[derive(Debug, PartialEq, Eq)] -pub struct DirectoryEntry { - path: Utf8PathBuf, - file_type: FileType, -} - -impl DirectoryEntry { - pub const fn new(path: Utf8PathBuf, file_type: FileType) -> Self { - Self { path, file_type } - } - - pub fn into_path(self) -> Utf8PathBuf { - self.path - } - - pub fn path(&self) -> &Utf8Path { - &self.path - } - - pub const fn file_type(&self) -> FileType { - self.file_type - } -} - -/// A glob iteration error. -/// -/// This is typically returned when a particular path cannot be read -/// to determine if its contents match the glob pattern. This is possible -/// if the program lacks the appropriate permissions, for example. -#[derive(Debug)] -pub struct GlobError { - path: PathBuf, - error: GlobErrorKind, -} - -impl GlobError { - /// The Path that the error corresponds to. - pub fn path(&self) -> &Path { - &self.path - } - - pub const fn kind(&self) -> &GlobErrorKind { - &self.error - } -} - -impl Error for GlobError {} - -impl std::fmt::Display for GlobError { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - match &self.error { - GlobErrorKind::IOError(error) => { - write!( - f, - "attempting to read `{}` resulted in an error: {error}", - self.path.display(), - ) - } - GlobErrorKind::NonUtf8Path => { - write!(f, "`{}` is not a valid UTF-8 path", self.path.display(),) - } - } - } -} - -impl From for GlobError { - fn from(value: glob::GlobError) -> Self { - Self { - path: value.path().to_path_buf(), - error: GlobErrorKind::IOError(value.into_error()), - } - } -} - -#[derive(Debug)] -pub enum GlobErrorKind { - IOError(io::Error), - NonUtf8Path, -} - -#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] -pub enum PySourceType { - /// The source is a Python file (`.py`, `.pyw`). - /// Note: `.pyw` files contain Python code, but do not represent importable namespaces. - /// Consider adding a separate source type later if combining the two causes issues. - #[default] - Python, - /// The source is a Python stub file (`.pyi`). - Stub, - /// The source is a Jupyter notebook (`.ipynb`). - Ipynb, -} - -impl PySourceType { - /// Infers the source type from the file extension. - /// - /// Falls back to `Python` if the extension is not recognized. - pub fn from_extension(extension: &str) -> Self { - Self::try_from_extension(extension).unwrap_or_default() - } - - /// Infers the source type from the file extension. - pub fn try_from_extension(extension: &str) -> Option { - let ty = match extension { - "pyi" => Self::Stub, - "py" | "pyw" => Self::Python, - "ipynb" => Self::Ipynb, - _ => return None, - }; - - Some(ty) - } - - pub fn try_from_path(path: impl AsRef) -> Option { - path.as_ref() - .extension() - .and_then(OsStr::to_str) - .and_then(Self::try_from_extension) - } - - pub const fn is_py_file(self) -> bool { - matches!(self, Self::Python) - } - - pub const fn is_stub(self) -> bool { - matches!(self, Self::Stub) - } - - pub const fn is_py_file_or_stub(self) -> bool { - matches!(self, Self::Python | Self::Stub) - } - - pub const fn is_ipynb(self) -> bool { - matches!(self, Self::Ipynb) - } -} - -impl> From

for PySourceType { - fn from(path: P) -> Self { - Self::try_from_path(path).unwrap_or_default() - } -} diff --git a/crates/karva_project/src/system/os.rs b/crates/karva_project/src/system/os.rs deleted file mode 100644 index c26bff4c..00000000 --- a/crates/karva_project/src/system/os.rs +++ /dev/null @@ -1,575 +0,0 @@ -use std::{any::Any, num::NonZeroUsize, path::PathBuf, sync::Arc}; - -use camino::{Utf8Path, Utf8PathBuf}; -use filetime::FileTime; - -use crate::{ - max_parallelism, - system::{ - DirectoryEntry, GlobError, GlobErrorKind, Metadata, Result, System, WritableSystem, - walk_directory::{ - self, DirectoryWalker, WalkDirectoryBuilder, WalkDirectoryConfiguration, - WalkDirectoryVisitorBuilder, WalkState, - }, - }, -}; - -/// A system implementation that uses the OS file system. -#[derive(Debug, Clone)] -pub struct OsSystem { - inner: Arc, -} - -#[derive(Default, Debug)] -struct OsSystemInner { - cwd: Utf8PathBuf, -} - -impl OsSystem { - pub fn new(cwd: impl AsRef) -> Self { - let cwd = cwd.as_ref(); - assert!(cwd.is_absolute()); - - tracing::debug!( - "Architecture: {}, OS: {}", - std::env::consts::ARCH, - std::env::consts::OS, - ); - - Self { - // Spreading `..Default` because it isn't possible to feature gate the initializer of a single field. - inner: Arc::new(OsSystemInner { - cwd: cwd.to_path_buf(), - }), - } - } - - #[cfg(unix)] - #[allow(clippy::unnecessary_wraps)] - fn permissions(metadata: &std::fs::Metadata) -> Option { - use std::os::unix::fs::PermissionsExt; - - Some(metadata.permissions().mode()) - } - - #[cfg(not(unix))] - fn permissions(_metadata: &std::fs::Metadata) -> Option { - None - } -} - -impl System for OsSystem { - fn path_metadata(&self, path: &Utf8Path) -> Result { - let metadata = path.as_std_path().metadata()?; - let last_modified = FileTime::from_last_modification_time(&metadata); - - Ok(Metadata { - revision: last_modified.into(), - permissions: Self::permissions(&metadata), - file_type: metadata.file_type().into(), - }) - } - - fn canonicalize_path(&self, path: &Utf8Path) -> Result { - path.canonicalize_utf8().map(|path| { - Utf8PathBuf::from_path_buf(dunce::simplified(path.as_std_path()).to_path_buf()).unwrap() - }) - } - - fn read_to_string(&self, path: &Utf8Path) -> Result { - std::fs::read_to_string(path.as_std_path()) - } - - fn path_exists(&self, path: &Utf8Path) -> bool { - path.as_std_path().exists() - } - - fn current_directory(&self) -> &Utf8Path { - &self.inner.cwd - } - - fn user_config_directory(&self) -> Option { - use etcetera::BaseStrategy as _; - - let strategy = etcetera::base_strategy::choose_base_strategy().ok()?; - Utf8PathBuf::from_path_buf(strategy.config_dir()).ok() - } - - fn cache_dir(&self) -> Option { - use etcetera::BaseStrategy as _; - - let cache_dir = etcetera::base_strategy::choose_base_strategy() - .ok() - .map(|dirs| dirs.cache_dir().join("ty")) - .map(|cache_dir| { - if cfg!(windows) { - cache_dir.join("cache") - } else { - cache_dir - } - }) - .and_then(|path| Utf8PathBuf::from_path_buf(path).ok()) - .unwrap_or_else(|| Utf8PathBuf::from(".ty_cache")); - - Some(cache_dir) - } - - fn read_directory( - &self, - path: &Utf8Path, - ) -> Result>>> { - Ok(Box::new(path.read_dir_utf8()?.map(|res| { - let res = res?; - - let file_type = res.file_type()?; - Ok(DirectoryEntry { - path: res.into_path(), - file_type: file_type.into(), - }) - }))) - } - - /// Creates a builder to recursively walk `path`. - /// - /// The walker ignores files according to [`ignore::WalkBuilder::standard_filters`] - /// when setting [`WalkDirectoryBuilder::standard_filters`] to true. - fn walk_directory(&self, path: &Utf8Path) -> WalkDirectoryBuilder { - WalkDirectoryBuilder::new( - path, - OsDirectoryWalker { - cwd: self.current_directory().to_path_buf(), - }, - ) - } - - fn glob( - &self, - pattern: &str, - ) -> std::result::Result< - Box>>, - glob::PatternError, - > { - glob::glob(pattern).map(|inner| { - let iterator = inner.map(|result| { - let path = result?; - - let system_path = Utf8PathBuf::from_path_buf(path).map_err(|path| GlobError { - path, - error: GlobErrorKind::NonUtf8Path, - })?; - - Ok(system_path) - }); - - let boxed: Box> = Box::new(iterator); - boxed - }) - } - - fn env_var(&self, name: &str) -> std::result::Result { - std::env::var(name) - } - - fn as_writable(&self) -> Option<&dyn WritableSystem> { - Some(self) - } - - fn as_any(&self) -> &dyn Any { - self - } - - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } - - fn dyn_clone(&self) -> Box { - Box::new(self.clone()) - } -} - -impl WritableSystem for OsSystem { - fn create_new_file(&self, path: &Utf8Path) -> Result<()> { - std::fs::File::create_new(path).map(drop) - } - - fn write_file(&self, path: &Utf8Path, content: &str) -> Result<()> { - std::fs::write(path.as_std_path(), content) - } - - fn create_directory_all(&self, path: &Utf8Path) -> Result<()> { - std::fs::create_dir_all(path.as_std_path()) - } -} - -#[derive(Debug)] -struct OsDirectoryWalker { - cwd: Utf8PathBuf, -} - -impl DirectoryWalker for OsDirectoryWalker { - fn walk( - &self, - visitor_builder: &mut dyn WalkDirectoryVisitorBuilder, - configuration: WalkDirectoryConfiguration, - ) { - let WalkDirectoryConfiguration { - paths, - ignore_hidden: hidden, - standard_filters, - } = configuration; - - let Some((first, additional)) = paths.split_first() else { - return; - }; - - let mut builder = ignore::WalkBuilder::new(first.as_std_path()); - builder.current_dir(self.cwd.as_std_path()); - - builder.standard_filters(standard_filters); - builder.hidden(hidden); - - for additional_path in additional { - builder.add(additional_path.as_std_path()); - } - - builder.threads(max_parallelism().min(NonZeroUsize::new(12).unwrap()).get()); - - builder.build_parallel().run(|| { - let mut visitor = visitor_builder.build(); - - Box::new(move |entry| { - match entry { - Ok(entry) => { - // SAFETY: The walkdir crate supports `stdin` files and `file_type` can be `None` for these files. - // We don't make use of this feature, which is why unwrapping here is ok. - let file_type = entry.file_type().unwrap(); - let depth = entry.depth(); - - // `walkdir` reports errors related to parsing ignore files as part of the entry. - // These aren't fatal for us. We should keep going even if an ignore file contains a syntax error. - // But we log the error here for better visibility (same as ripgrep, Ruff ignores it) - if let Some(error) = entry.error() { - tracing::warn!("{error}"); - } - - match Utf8PathBuf::from_path_buf(entry.into_path()) { - Ok(path) => { - let directory_entry = walk_directory::DirectoryEntry { - path, - file_type: file_type.into(), - depth, - }; - - visitor.visit(Ok(directory_entry)).into() - } - Err(path) => { - visitor.visit(Err(walk_directory::Error { - depth: Some(depth), - kind: walk_directory::ErrorKind::NonUtf8Path { path }, - })); - - // Skip the entire directory because all the paths won't be UTF-8 paths. - ignore::WalkState::Skip - } - } - } - Err(error) => match ignore_to_walk_directory_error(error, None, None) { - Ok(error) => visitor.visit(Err(error)).into(), - Err(error) => { - // This should only be reached when the error is a `.ignore` file related error - // (which, should not be reported here but the `ignore` crate doesn't distinguish between ignore and IO errors). - // Let's log the error to at least make it visible. - tracing::warn!("Failed to traverse directory: {error}."); - ignore::WalkState::Continue - } - }, - } - }) - }); - } -} - -#[cold] -fn ignore_to_walk_directory_error( - error: ignore::Error, - path: Option, - depth: Option, -) -> std::result::Result { - use ignore::Error; - - match error { - Error::WithPath { path, err } => ignore_to_walk_directory_error(*err, Some(path), depth), - Error::WithDepth { err, depth } => ignore_to_walk_directory_error(*err, path, Some(depth)), - Error::WithLineNumber { err, .. } => ignore_to_walk_directory_error(*err, path, depth), - Error::Loop { child, ancestor } => { - match ( - Utf8PathBuf::from_path_buf(child), - Utf8PathBuf::from_path_buf(ancestor), - ) { - (Ok(child), Ok(ancestor)) => Ok(walk_directory::Error { - depth, - kind: walk_directory::ErrorKind::Loop { child, ancestor }, - }), - (Err(child), _) => Ok(walk_directory::Error { - depth, - kind: walk_directory::ErrorKind::NonUtf8Path { path: child }, - }), - // We should never reach this because we should never traverse into a non UTF8 path but handle it anyway. - (_, Err(ancestor)) => Ok(walk_directory::Error { - depth, - kind: walk_directory::ErrorKind::NonUtf8Path { path: ancestor }, - }), - } - } - - Error::Io(err) => match path.map(Utf8PathBuf::from_path_buf).transpose() { - Ok(path) => Ok(walk_directory::Error { - depth, - kind: walk_directory::ErrorKind::Io { path, err }, - }), - Err(path) => Ok(walk_directory::Error { - depth, - kind: walk_directory::ErrorKind::NonUtf8Path { path }, - }), - }, - - // Ignore related errors, we warn about them but we don't abort iteration because of them. - error @ (Error::Glob { .. } - | Error::UnrecognizedFileType(_) - | Error::InvalidDefinition - | Error::Partial(..)) => Err(error), - } -} - -impl From for ignore::WalkState { - fn from(value: WalkState) -> Self { - match value { - WalkState::Continue => Self::Continue, - WalkState::Skip => Self::Skip, - WalkState::Quit => Self::Quit, - } - } -} - -#[cfg(test)] -mod tests { - use tempfile::TempDir; - - use super::*; - use crate::system::{DirectoryEntry, FileType, walk_directory::tests::DirectoryEntryToString}; - - #[test] - fn read_directory() { - let tempdir = TempDir::new().unwrap(); - let tempdir_path = tempdir.path(); - std::fs::create_dir_all(tempdir_path.join("a/foo")).unwrap(); - let files = &["b.ts", "a/bar.py", "d.rs", "a/foo/bar.py", "a/baz.pyi"]; - for path in files { - std::fs::File::create(tempdir_path.join(path)).unwrap(); - } - - let tempdir_path = Utf8Path::from_path(tempdir_path).unwrap(); - let fs = OsSystem::new(tempdir_path); - - let mut sorted_contents: Vec = fs - .read_directory(&tempdir_path.join("a")) - .unwrap() - .map(Result::unwrap) - .collect(); - sorted_contents.sort_by(|a, b| a.path.cmp(&b.path)); - - let expected_contents = vec![ - DirectoryEntry::new(tempdir_path.join("a/bar.py"), FileType::File), - DirectoryEntry::new(tempdir_path.join("a/baz.pyi"), FileType::File), - DirectoryEntry::new(tempdir_path.join("a/foo"), FileType::Directory), - ]; - assert_eq!(sorted_contents, expected_contents); - } - - #[test] - fn read_directory_nonexistent() { - let tempdir = TempDir::new().unwrap(); - - let fs = OsSystem::new(Utf8Path::from_path(tempdir.path()).unwrap()); - let result = fs.read_directory(Utf8Path::new("doesnt_exist")); - assert!(result.is_err_and(|error| error.kind() == std::io::ErrorKind::NotFound)); - } - - #[test] - fn read_directory_on_file() { - let tempdir = TempDir::new().unwrap(); - let tempdir_path = tempdir.path(); - std::fs::File::create(tempdir_path.join("a.py")).unwrap(); - - let tempdir_path = Utf8Path::from_path(tempdir_path).unwrap(); - let fs = OsSystem::new(tempdir_path); - let result = fs.read_directory(&tempdir_path.join("a.py")); - let Err(error) = result else { - panic!("Expected the read_dir() call to fail!"); - }; - - // We can't assert the error kind here because it's apparently an unstable feature! - // https://github.com/rust-lang/rust/issues/86442 - // assert_eq!(error.kind(), std::io::ErrorKind::NotADirectory); - - // We can't even assert the error message on all platforms, as it's different on Windows, - // where the message is "The directory name is invalid" rather than "Not a directory". - if cfg!(unix) { - assert!(error.to_string().contains("Not a directory")); - } - } - - #[test] - fn walk_directory() -> std::io::Result<()> { - let tempdir = TempDir::new()?; - - let root = tempdir.path(); - std::fs::create_dir_all(root.join("a/b"))?; - std::fs::write(root.join("foo.py"), "print('foo')")?; - std::fs::write(root.join("a/bar.py"), "print('bar')")?; - std::fs::write(root.join("a/baz.py"), "print('baz')")?; - std::fs::write(root.join("a/b/c.py"), "print('c')")?; - - let root_sys = Utf8Path::from_path(root).unwrap(); - let system = OsSystem::new(root_sys); - - let writer = DirectoryEntryToString::new(root_sys.to_path_buf()); - - system.walk_directory(root_sys).run(|| { - Box::new(|entry| { - writer.write_entry(entry); - - WalkState::Continue - }) - }); - - assert_eq!( - writer.to_string(), - r#"{ - "": ( - Directory, - 0, - ), - "a": ( - Directory, - 1, - ), - "a/b": ( - Directory, - 2, - ), - "a/b/c.py": ( - File, - 3, - ), - "a/bar.py": ( - File, - 2, - ), - "a/baz.py": ( - File, - 2, - ), - "foo.py": ( - File, - 1, - ), -}"# - ); - - Ok(()) - } - - #[test] - fn walk_directory_ignore() -> std::io::Result<()> { - let tempdir = TempDir::new()?; - - let root = tempdir.path(); - std::fs::create_dir_all(root.join("a/b"))?; - std::fs::write(root.join("foo.py"), "print('foo')\n")?; - std::fs::write(root.join("a/bar.py"), "print('bar')\n")?; - std::fs::write(root.join("a/baz.py"), "print('baz')\n")?; - - // Exclude the `b` directory. - std::fs::write(root.join("a/.ignore"), "b/\n")?; - std::fs::write(root.join("a/b/c.py"), "print('c')\n")?; - - let root_sys = Utf8Path::from_path(root).unwrap(); - let system = OsSystem::new(root_sys); - - let writer = DirectoryEntryToString::new(root_sys.to_path_buf()); - - system - .walk_directory(root_sys) - .standard_filters(true) - .run(|| { - Box::new(|entry| { - writer.write_entry(entry); - WalkState::Continue - }) - }); - - assert_eq!( - writer.to_string(), - r#"{ - "": ( - Directory, - 0, - ), - "a": ( - Directory, - 1, - ), - "a/bar.py": ( - File, - 2, - ), - "a/baz.py": ( - File, - 2, - ), - "foo.py": ( - File, - 1, - ), -}"# - ); - - Ok(()) - } - - #[test] - fn walk_directory_file() -> std::io::Result<()> { - let tempdir = TempDir::new()?; - - let root = tempdir.path(); - std::fs::write(root.join("foo.py"), "print('foo')\n")?; - - let root_sys = Utf8Path::from_path(root).unwrap(); - let system = OsSystem::new(root_sys); - - let writer = DirectoryEntryToString::new(root_sys.to_path_buf()); - - system - .walk_directory(&root_sys.join("foo.py")) - .standard_filters(true) - .run(|| { - Box::new(|entry| { - writer.write_entry(entry); - WalkState::Continue - }) - }); - - assert_eq!( - writer.to_string(), - r#"{ - "foo.py": ( - File, - 0, - ), -}"# - ); - - Ok(()) - } -} diff --git a/crates/karva_project/src/system/walk_directory.rs b/crates/karva_project/src/system/walk_directory.rs deleted file mode 100644 index 2b599125..00000000 --- a/crates/karva_project/src/system/walk_directory.rs +++ /dev/null @@ -1,324 +0,0 @@ -use std::{ - fmt::{Display, Formatter}, - path::PathBuf, -}; - -use camino::{Utf8Path, Utf8PathBuf}; - -use super::FileType; - -/// A builder for constructing a directory recursive traversal. -pub struct WalkDirectoryBuilder { - /// The implementation that does the directory walking. - walker: Box, - - /// The paths that should be walked. - paths: Vec, - - ignore_hidden: bool, - - standard_filters: bool, -} - -impl WalkDirectoryBuilder { - pub fn new(path: impl AsRef, walker: W) -> Self - where - W: DirectoryWalker + 'static, - { - Self { - walker: Box::new(walker), - paths: vec![path.as_ref().to_path_buf()], - ignore_hidden: true, - standard_filters: true, - } - } - - /// Adds a path that should be traversed recursively. - /// - /// Each additional path is traversed recursively. - /// This should be preferred over building multiple - /// walkers since it enables reusing resources. - #[expect(clippy::should_implement_trait)] - pub fn add(mut self, path: impl AsRef) -> Self { - self.paths.push(path.as_ref().to_path_buf()); - self - } - - /// Whether hidden files should be ignored. - /// - /// The definition of what a hidden file depends on the [`System`](super::System) and can be platform-dependent. - /// - /// This is enabled by default. - pub fn ignore_hidden(mut self, hidden: bool) -> Self { - self.ignore_hidden = hidden; - self - } - - /// Enables all the standard ignore filters. - /// - /// This toggles, as a group, all the filters that are enabled by default: - /// * [`hidden`](Self::ignore_hidden) - /// * Any [`System`](super::System) specific filters according (e.g., respecting `.ignore`, `.gitignore`, files). - /// - /// Defaults to `true`. - pub fn standard_filters(mut self, standard_filters: bool) -> Self { - self.standard_filters = standard_filters; - self.ignore_hidden = standard_filters; - - self - } - - /// Runs the directory traversal and calls the passed `builder` to create visitors - /// that do the visiting. The walker may run multiple threads to visit the directories. - pub fn run<'s, F>(self, builder: F) - where - F: FnMut() -> FnVisitor<'s>, - { - self.visit(&mut FnBuilder { builder }); - } - - /// Runs the directory traversal and calls the passed `builder` to create visitors - /// that do the visiting. The walker may run multiple threads to visit the directories. - pub fn visit(self, builder: &mut dyn WalkDirectoryVisitorBuilder) { - let configuration = WalkDirectoryConfiguration { - paths: self.paths, - ignore_hidden: self.ignore_hidden, - standard_filters: self.standard_filters, - }; - - self.walker.walk(builder, configuration); - } -} - -/// Concrete walker that performs the directory walking. -pub trait DirectoryWalker { - fn walk( - &self, - builder: &mut dyn WalkDirectoryVisitorBuilder, - configuration: WalkDirectoryConfiguration, - ); -} - -/// Creates a visitor for each thread that does the visiting. -pub trait WalkDirectoryVisitorBuilder<'s> { - fn build(&mut self) -> Box; -} - -/// Visitor handling the individual directory entries. -pub trait WalkDirectoryVisitor: Send { - fn visit(&mut self, entry: std::result::Result) -> WalkState; -} - -struct FnBuilder { - builder: F, -} - -impl<'s, F> WalkDirectoryVisitorBuilder<'s> for FnBuilder -where - F: FnMut() -> FnVisitor<'s>, -{ - fn build(&mut self) -> Box { - let visitor = (self.builder)(); - Box::new(FnVisitorImpl(visitor)) - } -} - -type FnVisitor<'s> = - Box) -> WalkState + Send + 's>; - -struct FnVisitorImpl<'s>(FnVisitor<'s>); - -impl WalkDirectoryVisitor for FnVisitorImpl<'_> { - fn visit(&mut self, entry: std::result::Result) -> WalkState { - (self.0)(entry) - } -} - -pub struct WalkDirectoryConfiguration { - pub paths: Vec, - pub ignore_hidden: bool, - pub standard_filters: bool, -} - -/// An entry in a directory. -#[derive(Debug, Clone)] -pub struct DirectoryEntry { - pub(super) path: Utf8PathBuf, - pub(super) file_type: FileType, - pub(super) depth: usize, -} - -impl DirectoryEntry { - /// The full path that this entry represents. - pub fn path(&self) -> &Utf8Path { - &self.path - } - - /// The full path that this entry represents. - /// Analogous to [`DirectoryEntry::path`], but moves ownership of the path. - pub fn into_path(self) -> Utf8PathBuf { - self.path - } - - /// Return the file type for the file that this entry points to. - pub fn file_type(&self) -> FileType { - self.file_type - } - - /// Returns the depth at which this entry was created relative to the root. - pub fn depth(&self) -> usize { - self.depth - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum WalkState { - /// Continue walking as normal - Continue, - - /// If the entry given is a directory, don't descend into it. - /// In all other cases, this has no effect. - Skip, - - /// Quit the entire iterator as soon as possible. - /// - /// Note: This is an inherently asynchronous action. It's possible - /// for more entries to be yielded even after instructing the iterator to quit. - Quit, -} - -pub struct Error { - pub(super) depth: Option, - pub(super) kind: ErrorKind, -} - -impl Error { - pub fn depth(&self) -> Option { - self.depth - } - - pub fn kind(&self) -> &ErrorKind { - &self.kind - } -} - -impl Display for Error { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match &self.kind { - ErrorKind::Loop { ancestor, child } => { - write!( - f, - "File system loop found: {child} points to an ancestor {ancestor}", - ) - } - ErrorKind::Io { - path: Some(path), - err, - } => { - write!(f, "IO error for operation on {path}: {err}") - } - ErrorKind::Io { path: None, err } => err.fmt(f), - ErrorKind::NonUtf8Path { path } => { - write!(f, "Non-UTF8 path: {}", path.display()) - } - } - } -} - -impl std::fmt::Debug for Error { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - std::fmt::Display::fmt(self, f) - } -} - -impl std::error::Error for Error {} - -#[derive(Debug)] -pub enum ErrorKind { - /// An error that occurs when a file loop is detected when traversing - /// symbolic links. - Loop { - ancestor: Utf8PathBuf, - child: Utf8PathBuf, - }, - - /// An error that occurs when doing I/O - Io { - path: Option, - err: std::io::Error, - }, - - /// A path is not a valid UTF-8 path. - NonUtf8Path { path: PathBuf }, -} - -#[cfg(test)] -pub(super) mod tests { - use std::collections::BTreeMap; - - use crate::system::{ - FileType, Utf8PathBuf, - walk_directory::{DirectoryEntry, Error}, - }; - - /// Test helper that creates a visual representation of the visited directory entries. - pub(crate) struct DirectoryEntryToString { - root_path: Utf8PathBuf, - inner: std::sync::Mutex, - } - - impl DirectoryEntryToString { - pub(crate) fn new(root_path: Utf8PathBuf) -> Self { - Self { - root_path, - inner: std::sync::Mutex::new(DirectoryEntryToStringInner::default()), - } - } - - pub(crate) fn write_entry(&self, entry: Result) { - let mut inner = self.inner.lock().unwrap(); - let DirectoryEntryToStringInner { errors, visited } = &mut *inner; - - match entry { - Ok(entry) => { - let relative_path = entry - .path() - .strip_prefix(&self.root_path) - .unwrap_or(entry.path()); - - let unix_path = relative_path - .components() - .map(|component| component.as_str()) - .collect::>() - .join("/"); - - visited.insert(unix_path, (entry.file_type, entry.depth)); - } - Err(error) => { - errors.push_str(&error.to_string()); - errors.push('\n'); - } - } - } - } - - impl std::fmt::Display for DirectoryEntryToString { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let inner = self.inner.lock().unwrap(); - write!(f, "{paths:#?}", paths = inner.visited)?; - - if !inner.errors.is_empty() { - writeln!(f, "\n\n{errors}", errors = inner.errors).unwrap(); - } - - Ok(()) - } - } - - #[derive(Default)] - struct DirectoryEntryToStringInner { - errors: String, - /// Stores the visited path. The key is the relative path to the root, using `/` as path separator. - visited: BTreeMap, - } -} From b4a1d61300c3eeb1ab65e7dbe63add55db43b8fd Mon Sep 17 00:00:00 2001 From: Matthew Mckee Date: Wed, 3 Dec 2025 14:25:05 +0000 Subject: [PATCH 03/13] Remove unused dependencies --- Cargo.lock | 41 --------------------------------- Cargo.toml | 8 ------- crates/karva_core/Cargo.toml | 2 -- crates/karva_project/Cargo.toml | 9 -------- 4 files changed, 60 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6a8b6092..ead66613 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -715,16 +715,6 @@ dependencies = [ "windows-sys 0.61.2", ] -[[package]] -name = "etcetera" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de48cc4d1c1d97a20fd819def54b890cadde72ed3ad0c614822a0a433361be96" -dependencies = [ - "cfg-if", - "windows-sys 0.61.2", -] - [[package]] name = "fastrand" version = "2.3.0" @@ -1174,12 +1164,10 @@ dependencies = [ "crossbeam-channel", "ctor", "ignore", - "indexmap", "insta", "karva_project", "karva_test", "pyo3", - "rayon", "regex", "rstest", "ruff_db", @@ -1223,18 +1211,9 @@ name = "karva_project" version = "0.0.0" dependencies = [ "camino", - "dunce", - "etcetera", - "filetime", - "glob", - "ignore", "karva_test", "ruff_db", "ruff_python_ast", - "rustc-hash", - "tempfile", - "thiserror", - "tracing", "tracing-subscriber", ] @@ -1739,26 +1718,6 @@ dependencies = [ "getrandom 0.3.4", ] -[[package]] -name = "rayon" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - [[package]] name = "redox_syscall" version = "0.5.18" diff --git a/Cargo.toml b/Cargo.toml index cdb73643..4a9b3a00 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,13 +23,7 @@ ctor = "0.6.1" ctrlc = { version = "3.5.1" } divan = { package = "codspeed-divan-compat", version = "4.1.0" } dunce = { version = "1.0.5" } -etcetera = { version = "0.11.0" } -# globset = { version = "0.4.14" } -# globwalk = { version = "0.9.1" } -filetime = { version = "0.2.23" } -glob = { version = "0.3.1" } ignore = { version = "0.4.25" } -indexmap = { version = "2.12.1" } insta = { version = "1.44.3" } insta-cmd = { version = "0.6.0" } itertools = { version = "0.14.0" } @@ -51,11 +45,9 @@ ruff_python_parser = { git = "https://github.com/astral-sh/ruff/", branch = "mai ruff_python_trivia = { git = "https://github.com/astral-sh/ruff/", branch = "main" } ruff_source_file = { git = "https://github.com/astral-sh/ruff/", branch = "main" } ruff_text_size = { git = "https://github.com/astral-sh/ruff/", branch = "main" } -rustc-hash = { version = "2.0.0" } serde = { version = "1.0.228" } serde_json = { version = "1.0.145" } tempfile = "3.23" -thiserror = { version = "2.0.0" } tracing = { version = "0.1.41", features = ["release_max_level_debug"] } tracing-flame = { version = "0.2.0" } tracing-subscriber = { version = "0.3.20", default-features = false, features = [ diff --git a/crates/karva_core/Cargo.toml b/crates/karva_core/Cargo.toml index d4d774a0..e3875651 100644 --- a/crates/karva_core/Cargo.toml +++ b/crates/karva_core/Cargo.toml @@ -26,8 +26,6 @@ tempfile = { workspace = true } tracing = { workspace = true } ignore = { workspace = true } regex = { workspace = true } -indexmap = { workspace = true } -rayon = "1.11.0" crossbeam-channel = "0.5.15" [dev-dependencies] diff --git a/crates/karva_project/Cargo.toml b/crates/karva_project/Cargo.toml index 88ea9df7..99607dd1 100644 --- a/crates/karva_project/Cargo.toml +++ b/crates/karva_project/Cargo.toml @@ -15,18 +15,9 @@ camino = { workspace = true } ruff_python_ast = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] } ruff_db = { workspace = true } -tracing = { workspace = true } -filetime = { workspace = true } -glob = { workspace = true } -rustc-hash = { workspace = true } -dunce = { workspace = true } -etcetera = { workspace = true } -ignore = { workspace = true } -thiserror = { workspace = true } [dev-dependencies] karva_test = { workspace = true } -tempfile = { workspace = true } [lints] workspace = true From 0088442629347eaa67bca3a2d110eba692a9148e Mon Sep 17 00:00:00 2001 From: Matthew Mckee Date: Wed, 3 Dec 2025 14:26:59 +0000 Subject: [PATCH 04/13] Add debug --- crates/karva_core/src/diagnostic/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crates/karva_core/src/diagnostic/mod.rs b/crates/karva_core/src/diagnostic/mod.rs index 72faeac1..b43c3a86 100644 --- a/crates/karva_core/src/diagnostic/mod.rs +++ b/crates/karva_core/src/diagnostic/mod.rs @@ -123,6 +123,8 @@ pub fn report_invalid_path(context: &Context, error: &TestPathError) { pub fn report_failed_to_import_module(context: &Context, module_name: &str, error: &str) { let builder = context.report_discovery_diagnostic(&FAILED_TO_IMPORT_MODULE); + tracing::debug!("Failed to import python module `{module_name}`: {error}"); + builder.into_diagnostic(format!( "Failed to import python module `{module_name}`: {error}" )); From c8ab71dc30582bb2d2f9caa7dc8e6d3f3dcb5b52 Mon Sep 17 00:00:00 2001 From: Matthew Mckee Date: Wed, 3 Dec 2025 14:27:24 +0000 Subject: [PATCH 05/13] Remove pycache filter --- crates/karva_core/src/collection/collector.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crates/karva_core/src/collection/collector.rs b/crates/karva_core/src/collection/collector.rs index 281b182e..6b2d0c78 100644 --- a/crates/karva_core/src/collection/collector.rs +++ b/crates/karva_core/src/collection/collector.rs @@ -232,10 +232,6 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { types.select("python"); types.build().unwrap() }) - .filter_entry(|entry| { - let file_name = entry.file_name(); - file_name != "__pycache__" - }) .build_parallel() } } From 8330395edc40f202a5d6c2da58c0eb14fbff9fe0 Mon Sep 17 00:00:00 2001 From: Matthew Mckee Date: Wed, 3 Dec 2025 14:28:27 +0000 Subject: [PATCH 06/13] Fix cargo format --- crates/karva_core/src/collection/models.rs | 6 ++---- crates/karva_core/src/discovery/discoverer.rs | 14 ++++++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/crates/karva_core/src/collection/models.rs b/crates/karva_core/src/collection/models.rs index 02f48579..730eddc9 100644 --- a/crates/karva_core/src/collection/models.rs +++ b/crates/karva_core/src/collection/models.rs @@ -1,11 +1,9 @@ -use std::collections::HashMap; -use std::sync::Arc; +use std::{collections::HashMap, sync::Arc}; use camino::Utf8PathBuf; use ruff_python_ast::StmtFunctionDef; -use crate::discovery::ModuleType; -use crate::name::ModulePath; +use crate::{discovery::ModuleType, name::ModulePath}; /// A collected module containing raw AST function definitions. /// This is populated during the parallel collection phase. diff --git a/crates/karva_core/src/discovery/discoverer.rs b/crates/karva_core/src/discovery/discoverer.rs index 8cbcaea8..946fcffd 100644 --- a/crates/karva_core/src/discovery/discoverer.rs +++ b/crates/karva_core/src/discovery/discoverer.rs @@ -47,7 +47,10 @@ impl<'ctx, 'proj, 'rep> StandardDiscoverer<'ctx, 'proj, 'rep> { TestPath::Directory(dir_path) => { all_directory_paths.insert(dir_path); } - TestPath::Function { path, function_name } => { + TestPath::Function { + path, + function_name, + } => { function_filters.insert(path, function_name); } } @@ -75,7 +78,8 @@ impl<'ctx, 'proj, 'rep> StandardDiscoverer<'ctx, 'proj, 'rep> { tracing::info!("Discovering test functions and fixtures..."); // Phase 2: Discovery (single-threaded) - convert collected AST to test functions and fixtures - let mut session_package = self.convert_collected_to_discovered(py, &collected_package, &function_filters); + let mut session_package = + self.convert_collected_to_discovered(py, &collected_package, &function_filters); session_package.shrink(); @@ -99,8 +103,10 @@ impl<'ctx, 'proj, 'rep> StandardDiscoverer<'ctx, 'proj, 'rep> { collected_module.file_path().into(), ); - let DiscoveredFunctions { functions, fixtures } = - super::visitor::discover(self.context, py, &module); + let DiscoveredFunctions { + functions, + fixtures, + } = super::visitor::discover(self.context, py, &module); module.extend_test_functions(functions); module.extend_fixtures(fixtures); From bf68d8ef232cb453d32b9695318b67135237d8d8 Mon Sep 17 00:00:00 2001 From: Matthew Mckee Date: Wed, 3 Dec 2025 14:41:18 +0000 Subject: [PATCH 07/13] Try fix --- crates/karva_core/src/collection/collector.rs | 34 ++++++-- crates/karva_core/src/collection/models.rs | 84 +++++++++---------- crates/karva_diff/src/main.rs | 5 +- 3 files changed, 70 insertions(+), 53 deletions(-) diff --git a/crates/karva_core/src/collection/collector.rs b/crates/karva_core/src/collection/collector.rs index 6b2d0c78..67ed68ca 100644 --- a/crates/karva_core/src/collection/collector.rs +++ b/crates/karva_core/src/collection/collector.rs @@ -7,7 +7,10 @@ use ruff_python_ast::Stmt; use ruff_python_parser::{Mode, ParseOptions, parse_unchecked}; use super::models::{CollectedModule, CollectedPackage}; -use crate::{Context, diagnostic::report_invalid_path, discovery::ModuleType, name::ModulePath}; +use crate::{ + Context, diagnostic::report_invalid_path, discovery::ModuleType, + extensions::fixtures::is_fixture_function, name::ModulePath, +}; pub struct ParallelCollector<'ctx, 'proj, 'rep> { context: &'ctx Context<'proj, 'rep>, @@ -24,16 +27,17 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { path, self.context.project().cwd(), self.context.project().metadata().python_version(), + self.context.project().options().test_prefix(), ) } /// Collect from a single test file path. - pub fn collect_test_file(&self, path: &Utf8PathBuf) -> Option { + pub(crate) fn collect_test_file(&self, path: &Utf8PathBuf) -> Option { self.collect_file(path) } /// Collect from a directory in parallel using `WalkParallel`. - pub fn collect_directory(&self, path: &Utf8PathBuf) -> CollectedPackage { + pub(crate) fn collect_directory(&self, path: &Utf8PathBuf) -> CollectedPackage { let mut package = CollectedPackage::new(path.clone()); // Create channels for communication @@ -52,10 +56,12 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { let walker = self.create_parallel_walker(path); let cwd = self.context.project().cwd().clone(); let python_version = self.context.project().metadata().python_version(); + let test_prefix = self.context.project().options().test_prefix().to_string(); walker.run(|| { let tx = tx.clone(); let cwd = cwd.clone(); + let test_prefix = test_prefix.clone(); Box::new(move |entry| { let Ok(entry) = entry else { @@ -72,7 +78,9 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { }; // Collect the module - if let Some(module) = Self::collect_file_static(&file_path, &cwd, python_version) { + if let Some(module) = + Self::collect_file_static(&file_path, &cwd, python_version, &test_prefix) + { let _ = tx.send(module); } @@ -107,6 +115,7 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { path: &Utf8PathBuf, cwd: &Utf8PathBuf, python_version: ruff_python_ast::PythonVersion, + test_prefix: &str, ) -> Option { let module_path = ModulePath::new(path, cwd)?; @@ -116,8 +125,7 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { let module_type: ModuleType = path.into(); - let mut collected_module = - CollectedModule::new(module_path, path.clone(), module_type, source_text.clone()); + let mut collected_module = CollectedModule::new(module_path, path.clone(), module_type); // Parse the file to collect function definitions let parsed = parse_unchecked( @@ -126,10 +134,18 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { ) .try_into_module()?; - // Collect all top-level function definitions + // Collect and categorize top-level function definitions for stmt in &parsed.syntax().body { if let Stmt::FunctionDef(function_def) = stmt { - collected_module.add_function_def(Arc::new(function_def.clone())); + // Check if it's a fixture function + if is_fixture_function(function_def) { + collected_module.add_fixture_function_def(Arc::new(function_def.clone())); + } + // Check if it's a test function (starts with test prefix) + else if function_def.name.to_string().starts_with(test_prefix) { + collected_module.add_test_function_def(Arc::new(function_def.clone())); + } + // Otherwise, ignore the function (it's not relevant for testing) } } @@ -137,7 +153,7 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { } /// Collect from all paths and build a complete package structure. - pub fn collect_all(&self) -> CollectedPackage { + pub(crate) fn collect_all(&self) -> CollectedPackage { let mut session_package = CollectedPackage::new(self.context.project().cwd().clone()); // Process all test paths diff --git a/crates/karva_core/src/collection/models.rs b/crates/karva_core/src/collection/models.rs index 730eddc9..0b747e3c 100644 --- a/crates/karva_core/src/collection/models.rs +++ b/crates/karva_core/src/collection/models.rs @@ -12,55 +12,49 @@ pub struct CollectedModule { path: ModulePath, file_path: Utf8PathBuf, module_type: ModuleType, - source_text: String, - /// All function definitions found in this module (both tests and fixtures) - function_defs: Vec>, + /// Test function definitions (functions starting with test prefix) + test_function_defs: Vec>, + /// Fixture function definitions (functions with fixture decorators) + fixture_function_defs: Vec>, } impl CollectedModule { - pub const fn new( + pub(crate) const fn new( path: ModulePath, file_path: Utf8PathBuf, module_type: ModuleType, - source_text: String, ) -> Self { Self { path, file_path, module_type, - source_text, - function_defs: Vec::new(), + test_function_defs: Vec::new(), + fixture_function_defs: Vec::new(), } } - pub fn add_function_def(&mut self, function_def: Arc) { - self.function_defs.push(function_def); + pub(crate) fn add_test_function_def(&mut self, function_def: Arc) { + self.test_function_defs.push(function_def); } - pub const fn path(&self) -> &ModulePath { + pub(crate) fn add_fixture_function_def(&mut self, function_def: Arc) { + self.fixture_function_defs.push(function_def); + } + + pub(crate) const fn path(&self) -> &ModulePath { &self.path } - pub const fn file_path(&self) -> &Utf8PathBuf { + pub(crate) const fn file_path(&self) -> &Utf8PathBuf { &self.file_path } - pub const fn module_type(&self) -> ModuleType { + pub(crate) const fn module_type(&self) -> ModuleType { self.module_type } - #[allow(dead_code)] - pub fn source_text(&self) -> &str { - &self.source_text - } - - #[allow(dead_code)] - pub fn function_defs(&self) -> &[Arc] { - &self.function_defs - } - - pub fn is_empty(&self) -> bool { - self.function_defs.is_empty() + pub(crate) fn is_empty(&self) -> bool { + self.test_function_defs.is_empty() && self.fixture_function_defs.is_empty() } } @@ -75,7 +69,7 @@ pub struct CollectedPackage { } impl CollectedPackage { - pub fn new(path: Utf8PathBuf) -> Self { + pub(crate) fn new(path: Utf8PathBuf) -> Self { Self { path, modules: HashMap::new(), @@ -84,23 +78,18 @@ impl CollectedPackage { } } - pub const fn path(&self) -> &Utf8PathBuf { + pub(crate) const fn path(&self) -> &Utf8PathBuf { &self.path } - pub const fn modules(&self) -> &HashMap { + pub(crate) const fn modules(&self) -> &HashMap { &self.modules } - pub const fn packages(&self) -> &HashMap { + pub(crate) const fn packages(&self) -> &HashMap { &self.packages } - #[allow(dead_code)] - pub const fn configuration_module_path(&self) -> Option<&Utf8PathBuf> { - self.configuration_module_path.as_ref() - } - /// Add a module to this package. /// /// If the module path does not start with our path, do nothing. @@ -108,7 +97,7 @@ impl CollectedPackage { /// If the module path equals our path, use update method. /// /// Otherwise, strip the current path from the start and add the module to the appropriate sub-package. - pub fn add_module(&mut self, module: CollectedModule) { + pub(crate) fn add_module(&mut self, module: CollectedModule) { if !module.file_path().starts_with(self.path()) { return; } @@ -156,7 +145,7 @@ impl CollectedPackage { } } - pub fn add_configuration_module(&mut self, module: CollectedModule) { + pub(crate) fn add_configuration_module(&mut self, module: CollectedModule) { self.configuration_module_path = Some(module.file_path().clone()); self.add_module(module); } @@ -166,7 +155,7 @@ impl CollectedPackage { /// If the package path equals our path, use update method. /// /// Otherwise, strip the current path from the start and add the package to the appropriate sub-package. - pub fn add_package(&mut self, package: Self) { + pub(crate) fn add_package(&mut self, package: Self) { if !package.path().starts_with(self.path()) { return; } @@ -199,7 +188,7 @@ impl CollectedPackage { } } - pub fn update(&mut self, package: Self) { + pub(crate) fn update(&mut self, package: Self) { for (_, module) in package.modules { self.add_module(module); } @@ -212,11 +201,11 @@ impl CollectedPackage { } } - pub fn is_empty(&self) -> bool { + pub(crate) fn is_empty(&self) -> bool { self.modules.is_empty() && self.packages.is_empty() } - pub fn shrink(&mut self) { + pub(crate) fn shrink(&mut self) { self.modules.retain(|_, module| !module.is_empty()); if let Some(configuration_module) = self.configuration_module_path.as_ref() { @@ -236,15 +225,24 @@ impl CollectedPackage { impl CollectedModule { /// Update this module with another module. /// Merges function definitions from the other module into this one. - pub fn update(&mut self, module: Self) { + pub(crate) fn update(&mut self, module: Self) { if self.path == module.path { - for function_def in module.function_defs { + for function_def in module.test_function_defs { + if !self + .test_function_defs + .iter() + .any(|existing| existing.name == function_def.name) + { + self.test_function_defs.push(function_def); + } + } + for function_def in module.fixture_function_defs { if !self - .function_defs + .fixture_function_defs .iter() .any(|existing| existing.name == function_def.name) { - self.function_defs.push(function_def); + self.fixture_function_defs.push(function_def); } } } diff --git a/crates/karva_diff/src/main.rs b/crates/karva_diff/src/main.rs index 2f359d24..ef1b2ecd 100644 --- a/crates/karva_diff/src/main.rs +++ b/crates/karva_diff/src/main.rs @@ -177,5 +177,8 @@ fn run( fn extract_test_result(output: &[u8]) -> String { let output_str = String::from_utf8_lossy(output); - output_str.to_string() + // Strip `; finished in` and all text after it. + let strip_index = output_str.find("; finished in").unwrap_or(output_str.len()); + + output_str[..strip_index].to_string() } From c11362035c8301f2cd541978271f61c3ae6a7a97 Mon Sep 17 00:00:00 2001 From: Matthew Mckee Date: Wed, 3 Dec 2025 14:53:50 +0000 Subject: [PATCH 08/13] Try speed up --- crates/karva_core/src/collection/collector.rs | 26 +++++++++++++++---- crates/karva_core/src/collection/models.rs | 22 ++++++++++++++-- crates/karva_core/src/discovery/discoverer.rs | 7 ++--- .../karva_core/src/discovery/models/module.rs | 8 +++--- crates/karva_core/src/discovery/visitor.rs | 12 ++++----- crates/karva_project/src/envs.rs | 19 ++++++++++++++ crates/karva_project/src/lib.rs | 2 ++ 7 files changed, 77 insertions(+), 19 deletions(-) create mode 100644 crates/karva_project/src/envs.rs diff --git a/crates/karva_core/src/collection/collector.rs b/crates/karva_core/src/collection/collector.rs index 67ed68ca..4e535eb8 100644 --- a/crates/karva_core/src/collection/collector.rs +++ b/crates/karva_core/src/collection/collector.rs @@ -125,8 +125,6 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { let module_type: ModuleType = path.into(); - let mut collected_module = CollectedModule::new(module_path, path.clone(), module_type); - // Parse the file to collect function definitions let parsed = parse_unchecked( &source_text, @@ -134,21 +132,35 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { ) .try_into_module()?; - // Collect and categorize top-level function definitions + // Collect and categorize top-level function definitions from the parsed AST + let mut test_defs = Vec::new(); + let mut fixture_defs = Vec::new(); + for stmt in &parsed.syntax().body { if let Stmt::FunctionDef(function_def) = stmt { // Check if it's a fixture function if is_fixture_function(function_def) { - collected_module.add_fixture_function_def(Arc::new(function_def.clone())); + fixture_defs.push(Arc::new(function_def.clone())); } // Check if it's a test function (starts with test prefix) else if function_def.name.to_string().starts_with(test_prefix) { - collected_module.add_test_function_def(Arc::new(function_def.clone())); + test_defs.push(Arc::new(function_def.clone())); } // Otherwise, ignore the function (it's not relevant for testing) } } + let mut collected_module = + CollectedModule::new(module_path, path.clone(), module_type, source_text, parsed); + + // Add collected functions + for test_def in test_defs { + collected_module.add_test_function_def(test_def); + } + for fixture_def in fixture_defs { + collected_module.add_fixture_function_def(fixture_def); + } + Some(collected_module) } @@ -236,7 +248,11 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { /// Creates a configured parallel directory walker for Python file discovery. fn create_parallel_walker(&self, path: &Utf8PathBuf) -> ignore::WalkParallel { + // Configure thread pool size for optimal parallelism + let num_threads = karva_project::max_parallelism().get(); + WalkBuilder::new(path) + .threads(num_threads) .standard_filters(true) .require_git(false) .git_global(false) diff --git a/crates/karva_core/src/collection/models.rs b/crates/karva_core/src/collection/models.rs index 0b747e3c..d54a970c 100644 --- a/crates/karva_core/src/collection/models.rs +++ b/crates/karva_core/src/collection/models.rs @@ -1,7 +1,8 @@ use std::{collections::HashMap, sync::Arc}; use camino::Utf8PathBuf; -use ruff_python_ast::StmtFunctionDef; +use ruff_python_ast::{ModModule, StmtFunctionDef}; +use ruff_python_parser::Parsed; use crate::{discovery::ModuleType, name::ModulePath}; @@ -12,6 +13,11 @@ pub struct CollectedModule { path: ModulePath, file_path: Utf8PathBuf, module_type: ModuleType, + /// The source text of the file (cached to avoid re-reading) + source_text: String, + /// Parsed AST (cached to avoid re-parsing) - stored for potential future optimization + #[allow(dead_code)] + parsed_ast: Arc>, /// Test function definitions (functions starting with test prefix) test_function_defs: Vec>, /// Fixture function definitions (functions with fixture decorators) @@ -19,15 +25,19 @@ pub struct CollectedModule { } impl CollectedModule { - pub(crate) const fn new( + pub(crate) fn new( path: ModulePath, file_path: Utf8PathBuf, module_type: ModuleType, + source_text: String, + parsed_ast: Parsed, ) -> Self { Self { path, file_path, module_type, + source_text, + parsed_ast: Arc::new(parsed_ast), test_function_defs: Vec::new(), fixture_function_defs: Vec::new(), } @@ -53,6 +63,14 @@ impl CollectedModule { self.module_type } + pub(crate) fn source_text(&self) -> &str { + &self.source_text + } + + pub(crate) fn parsed_ast(&self) -> &Parsed { + &self.parsed_ast + } + pub(crate) fn is_empty(&self) -> bool { self.test_function_defs.is_empty() && self.fixture_function_defs.is_empty() } diff --git a/crates/karva_core/src/discovery/discoverer.rs b/crates/karva_core/src/discovery/discoverer.rs index 946fcffd..68ce03e6 100644 --- a/crates/karva_core/src/discovery/discoverer.rs +++ b/crates/karva_core/src/discovery/discoverer.rs @@ -98,15 +98,16 @@ impl<'ctx, 'proj, 'rep> StandardDiscoverer<'ctx, 'proj, 'rep> { // Convert all modules for collected_module in collected_package.modules().values() { - let mut module = DiscoveredModule::new( + let mut module = DiscoveredModule::new_with_source( collected_module.path().clone(), - collected_module.file_path().into(), + collected_module.module_type(), + collected_module.source_text().to_string(), ); let DiscoveredFunctions { functions, fixtures, - } = super::visitor::discover(self.context, py, &module); + } = super::visitor::discover(self.context, py, &module, collected_module.parsed_ast()); module.extend_test_functions(functions); module.extend_fixtures(fixtures); diff --git a/crates/karva_core/src/discovery/models/module.rs b/crates/karva_core/src/discovery/models/module.rs index a9b12602..8dc7c1b5 100644 --- a/crates/karva_core/src/discovery/models/module.rs +++ b/crates/karva_core/src/discovery/models/module.rs @@ -14,9 +14,11 @@ pub struct DiscoveredModule { } impl DiscoveredModule { - pub(crate) fn new(path: ModulePath, module_type: ModuleType) -> Self { - let source_text = std::fs::read_to_string(path.path()).expect("Failed to read source file"); - + pub(crate) const fn new_with_source( + path: ModulePath, + module_type: ModuleType, + source_text: String, + ) -> Self { Self { path, test_functions: Vec::new(), diff --git a/crates/karva_core/src/discovery/visitor.rs b/crates/karva_core/src/discovery/visitor.rs index 382aa2cc..6eb96dc4 100644 --- a/crates/karva_core/src/discovery/visitor.rs +++ b/crates/karva_core/src/discovery/visitor.rs @@ -264,17 +264,17 @@ pub struct DiscoveredFunctions { pub(crate) fixtures: Vec, } -pub fn discover(context: &Context, py: Python, module: &DiscoveredModule) -> DiscoveredFunctions { +pub fn discover( + context: &Context, + py: Python, + module: &DiscoveredModule, + parsed: &Parsed, +) -> DiscoveredFunctions { tracing::info!( "Discovering test functions and fixtures in module {}", module.name() ); - let parsed = parsed_module( - module.source_text(), - context.project().metadata().python_version(), - ); - let mut visitor = FunctionDefinitionVisitor::new(py, context, module); visitor.visit_body(&parsed.syntax().body); diff --git a/crates/karva_project/src/envs.rs b/crates/karva_project/src/envs.rs new file mode 100644 index 00000000..907c7e93 --- /dev/null +++ b/crates/karva_project/src/envs.rs @@ -0,0 +1,19 @@ +use std::num::NonZeroUsize; + +pub struct EnvVars; + +impl EnvVars { + // Externally defined environment variables + + /// This is a standard Rayon environment variable. + pub const RAYON_NUM_THREADS: &'static str = "RAYON_NUM_THREADS"; +} + +pub fn max_parallelism() -> NonZeroUsize { + std::env::var(EnvVars::RAYON_NUM_THREADS) + .ok() + .and_then(|s| s.parse().ok()) + .unwrap_or_else(|| { + std::thread::available_parallelism().unwrap_or_else(|_| NonZeroUsize::new(1).unwrap()) + }) +} diff --git a/crates/karva_project/src/lib.rs b/crates/karva_project/src/lib.rs index 9d0e3c5a..5c0c08e5 100644 --- a/crates/karva_project/src/lib.rs +++ b/crates/karva_project/src/lib.rs @@ -1,8 +1,10 @@ +mod envs; mod path; mod project; mod utils; mod verbosity; +pub use envs::{EnvVars, max_parallelism}; pub use path::{TestPath, TestPathError, absolute}; pub use project::{Project, ProjectMetadata, ProjectOptions, TestPrefix}; pub use utils::module_name; From e0cba9bc1362130e93fb67c08d0e93424a86ead5 Mon Sep 17 00:00:00 2001 From: Matthew Mckee Date: Wed, 3 Dec 2025 15:14:16 +0000 Subject: [PATCH 09/13] Try to speed up --- crates/karva_core/src/collection/collector.rs | 14 +-- crates/karva_core/src/collection/models.rs | 18 ++- crates/karva_core/src/discovery/discoverer.rs | 8 +- .../src/discovery/models/function.rs | 4 +- .../karva_core/src/discovery/models/module.rs | 6 +- crates/karva_core/src/discovery/visitor.rs | 107 +++++++++--------- 6 files changed, 79 insertions(+), 78 deletions(-) diff --git a/crates/karva_core/src/collection/collector.rs b/crates/karva_core/src/collection/collector.rs index 4e535eb8..9880b0ed 100644 --- a/crates/karva_core/src/collection/collector.rs +++ b/crates/karva_core/src/collection/collector.rs @@ -119,9 +119,7 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { ) -> Option { let module_path = ModulePath::new(path, cwd)?; - let Ok(source_text) = std::fs::read_to_string(path) else { - return None; - }; + let source_text = std::fs::read_to_string(path).ok()?; let module_type: ModuleType = path.into(); @@ -136,22 +134,22 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { let mut test_defs = Vec::new(); let mut fixture_defs = Vec::new(); - for stmt in &parsed.syntax().body { + for stmt in parsed.into_syntax().body { if let Stmt::FunctionDef(function_def) = stmt { // Check if it's a fixture function - if is_fixture_function(function_def) { - fixture_defs.push(Arc::new(function_def.clone())); + if is_fixture_function(&function_def) { + fixture_defs.push(Arc::new(function_def)); } // Check if it's a test function (starts with test prefix) else if function_def.name.to_string().starts_with(test_prefix) { - test_defs.push(Arc::new(function_def.clone())); + test_defs.push(Arc::new(function_def)); } // Otherwise, ignore the function (it's not relevant for testing) } } let mut collected_module = - CollectedModule::new(module_path, path.clone(), module_type, source_text, parsed); + CollectedModule::new(module_path, path.clone(), module_type, source_text); // Add collected functions for test_def in test_defs { diff --git a/crates/karva_core/src/collection/models.rs b/crates/karva_core/src/collection/models.rs index d54a970c..4b4c506a 100644 --- a/crates/karva_core/src/collection/models.rs +++ b/crates/karva_core/src/collection/models.rs @@ -1,8 +1,7 @@ use std::{collections::HashMap, sync::Arc}; use camino::Utf8PathBuf; -use ruff_python_ast::{ModModule, StmtFunctionDef}; -use ruff_python_parser::Parsed; +use ruff_python_ast::StmtFunctionDef; use crate::{discovery::ModuleType, name::ModulePath}; @@ -15,9 +14,6 @@ pub struct CollectedModule { module_type: ModuleType, /// The source text of the file (cached to avoid re-reading) source_text: String, - /// Parsed AST (cached to avoid re-parsing) - stored for potential future optimization - #[allow(dead_code)] - parsed_ast: Arc>, /// Test function definitions (functions starting with test prefix) test_function_defs: Vec>, /// Fixture function definitions (functions with fixture decorators) @@ -25,19 +21,17 @@ pub struct CollectedModule { } impl CollectedModule { - pub(crate) fn new( + pub(crate) const fn new( path: ModulePath, file_path: Utf8PathBuf, module_type: ModuleType, source_text: String, - parsed_ast: Parsed, ) -> Self { Self { path, file_path, module_type, source_text, - parsed_ast: Arc::new(parsed_ast), test_function_defs: Vec::new(), fixture_function_defs: Vec::new(), } @@ -67,8 +61,12 @@ impl CollectedModule { &self.source_text } - pub(crate) fn parsed_ast(&self) -> &Parsed { - &self.parsed_ast + pub(crate) fn test_function_defs(&self) -> &[Arc] { + &self.test_function_defs + } + + pub(crate) fn fixture_function_defs(&self) -> &[Arc] { + &self.fixture_function_defs } pub(crate) fn is_empty(&self) -> bool { diff --git a/crates/karva_core/src/discovery/discoverer.rs b/crates/karva_core/src/discovery/discoverer.rs index 68ce03e6..1d33c11a 100644 --- a/crates/karva_core/src/discovery/discoverer.rs +++ b/crates/karva_core/src/discovery/discoverer.rs @@ -107,7 +107,13 @@ impl<'ctx, 'proj, 'rep> StandardDiscoverer<'ctx, 'proj, 'rep> { let DiscoveredFunctions { functions, fixtures, - } = super::visitor::discover(self.context, py, &module, collected_module.parsed_ast()); + } = super::visitor::discover( + self.context, + py, + &module, + collected_module.test_function_defs(), + collected_module.fixture_function_defs(), + ); module.extend_test_functions(functions); module.extend_fixtures(fixtures); diff --git a/crates/karva_core/src/discovery/models/function.rs b/crates/karva_core/src/discovery/models/function.rs index 5e770017..4ecf88ca 100644 --- a/crates/karva_core/src/discovery/models/function.rs +++ b/crates/karva_core/src/discovery/models/function.rs @@ -29,7 +29,7 @@ impl TestFunction { pub(crate) fn new( py: Python<'_>, module: &DiscoveredModule, - stmt_function_def: StmtFunctionDef, + stmt_function_def: Arc, py_function: Py, ) -> Self { let name = QualifiedFunctionName::new( @@ -41,7 +41,7 @@ impl TestFunction { Self { name, - stmt_function_def: Arc::new(stmt_function_def), + stmt_function_def, py_function, tags, } diff --git a/crates/karva_core/src/discovery/models/module.rs b/crates/karva_core/src/discovery/models/module.rs index 8dc7c1b5..ad7da5a8 100644 --- a/crates/karva_core/src/discovery/models/module.rs +++ b/crates/karva_core/src/discovery/models/module.rs @@ -9,7 +9,7 @@ pub struct DiscoveredModule { path: ModulePath, test_functions: Vec, fixtures: Vec, - type_: ModuleType, + module_type: ModuleType, source_text: String, } @@ -23,7 +23,7 @@ impl DiscoveredModule { path, test_functions: Vec::new(), fixtures: Vec::new(), - type_: module_type, + module_type, source_text, } } @@ -41,7 +41,7 @@ impl DiscoveredModule { } pub(crate) const fn module_type(&self) -> ModuleType { - self.type_ + self.module_type } pub(crate) fn test_functions(&self) -> Vec<&TestFunction> { diff --git a/crates/karva_core/src/discovery/visitor.rs b/crates/karva_core/src/discovery/visitor.rs index 6eb96dc4..fc52f9f7 100644 --- a/crates/karva_core/src/discovery/visitor.rs +++ b/crates/karva_core/src/discovery/visitor.rs @@ -12,7 +12,7 @@ use crate::{ Context, diagnostic::{report_failed_to_import_module, report_invalid_fixture}, discovery::{DiscoveredModule, TestFunction}, - extensions::fixtures::{Fixture, is_fixture_function}, + extensions::fixtures::Fixture, }; /// Visitor for discovering test functions and fixture definitions in a given module. @@ -196,64 +196,54 @@ impl<'ctx, 'proj, 'rep, 'py, 'a> FunctionDefinitionVisitor<'ctx, 'proj, 'rep, 'p } } -impl SourceOrderVisitor<'_> for FunctionDefinitionVisitor<'_, '_, '_, '_, '_> { - fn visit_stmt(&mut self, stmt: &'_ Stmt) { - if let Stmt::FunctionDef(stmt_function_def) = stmt { - if is_fixture_function(stmt_function_def) { - self.try_import_module(); +impl FunctionDefinitionVisitor<'_, '_, '_, '_, '_> { + fn process_fixture_function(&mut self, stmt_function_def: &Arc) { + self.try_import_module(); - let Some(py_module) = self.py_module.as_ref() else { - return; - }; + let Some(py_module) = self.py_module.as_ref() else { + return; + }; - let mut generator_function_visitor = GeneratorFunctionVisitor::default(); + let mut generator_function_visitor = GeneratorFunctionVisitor::default(); - source_order::walk_body(&mut generator_function_visitor, &stmt_function_def.body); + source_order::walk_body(&mut generator_function_visitor, &stmt_function_def.body); - let is_generator_function = generator_function_visitor.is_generator; + let is_generator_function = generator_function_visitor.is_generator; - match Fixture::try_from_function( + match Fixture::try_from_function( + self.py, + stmt_function_def, + py_module, + self.module.module_path(), + is_generator_function, + ) { + Ok(fixture_def) => self.fixtures.push(fixture_def), + Err(e) => { + report_invalid_fixture( + self.context, self.py, - &Arc::new(stmt_function_def.clone()), - py_module, - self.module.module_path(), - is_generator_function, - ) { - Ok(fixture_def) => self.fixtures.push(fixture_def), - Err(e) => { - report_invalid_fixture( - self.context, - self.py, - self.module.source_file(), - stmt_function_def, - &e, - ); - } - } - } else if stmt_function_def - .name - .to_string() - .starts_with(self.context.project().options().test_prefix()) - { - self.try_import_module(); - - let Some(py_module) = self.py_module.as_ref() else { - return; - }; - - if let Ok(py_function) = py_module.getattr(stmt_function_def.name.to_string()) { - self.test_functions.push(TestFunction::new( - self.py, - self.module, - stmt_function_def.clone(), - py_function.unbind(), - )); - } + self.module.source_file(), + stmt_function_def, + &e, + ); } - } else { - // Only walk statement if its not a function statement - // since we don't want to find nested functions - source_order::walk_stmt(self, stmt); + } + } + + fn process_test_function(&mut self, stmt_function_def: &Arc) { + self.try_import_module(); + + let Some(py_module) = self.py_module.as_ref() else { + return; + }; + + if let Ok(py_function) = py_module.getattr(stmt_function_def.name.to_string()) { + self.test_functions.push(TestFunction::new( + self.py, + self.module, + Arc::clone(stmt_function_def), + py_function.unbind(), + )); } } } @@ -268,7 +258,8 @@ pub fn discover( context: &Context, py: Python, module: &DiscoveredModule, - parsed: &Parsed, + test_function_defs: &[Arc], + fixture_function_defs: &[Arc], ) -> DiscoveredFunctions { tracing::info!( "Discovering test functions and fixtures in module {}", @@ -277,7 +268,15 @@ pub fn discover( let mut visitor = FunctionDefinitionVisitor::new(py, context, module); - visitor.visit_body(&parsed.syntax().body); + // Process collected test functions + for test_function_def in test_function_defs { + visitor.process_test_function(test_function_def); + } + + // Process collected fixture functions + for fixture_function_def in fixture_function_defs { + visitor.process_fixture_function(fixture_function_def); + } if context.project().options().try_import_fixtures() { visitor.find_extra_fixtures(); From 04359a49477204067c119411de46624e4b6ab16f Mon Sep 17 00:00:00 2001 From: Matthew Mckee Date: Wed, 3 Dec 2025 15:17:01 +0000 Subject: [PATCH 10/13] Update codspeed action --- .github/workflows/ci.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 27ba083e..3a8204a5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -134,9 +134,7 @@ jobs: run: cargo codspeed build -p karva_benchmark - name: Run benchmarks - uses: CodSpeedHQ/action@76578c2a7ddd928664caa737f0e962e3085d4e7c # v3.8.1 - env: - CODSPEED_PERF_ENABLED: false + uses: CodSpeedHQ/action@346a2d8a8d9d38909abd0bc3d23f773110f076ad # v4.4.1 with: token: ${{ secrets.CODSPEED_TOKEN }} run: cargo codspeed run --bench karva @@ -183,7 +181,7 @@ jobs: run: cargo codspeed build -m walltime - name: Run benchmarks - uses: CodSpeedHQ/action@76578c2a7ddd928664caa737f0e962e3085d4e7c # v3.8.1 + uses: CodSpeedHQ/action@346a2d8a8d9d38909abd0bc3d23f773110f076ad # v4.4.1 with: token: ${{ secrets.CODSPEED_TOKEN }} run: cargo codspeed run --bench ${{ matrix.project }} From 4c08b1a738a866b26ffb8a131f3085555d07a196 Mon Sep 17 00:00:00 2001 From: Matthew Mckee Date: Wed, 3 Dec 2025 15:26:52 +0000 Subject: [PATCH 11/13] Small updates --- .github/workflows/ci.yml | 1 + crates/karva_core/src/collection/collector.rs | 136 +++++++----------- 2 files changed, 50 insertions(+), 87 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3a8204a5..29be1773 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -138,6 +138,7 @@ jobs: with: token: ${{ secrets.CODSPEED_TOKEN }} run: cargo codspeed run --bench karva + mode: instrumentation benchmarks-walltime: name: Run walltime benchmarks diff --git a/crates/karva_core/src/collection/collector.rs b/crates/karva_core/src/collection/collector.rs index 9880b0ed..9ccbe952 100644 --- a/crates/karva_core/src/collection/collector.rs +++ b/crates/karva_core/src/collection/collector.rs @@ -2,7 +2,7 @@ use std::{sync::Arc, thread}; use camino::{Utf8Path, Utf8PathBuf}; use crossbeam_channel::unbounded; -use ignore::WalkBuilder; +use ignore::{WalkBuilder, WalkState}; use ruff_python_ast::Stmt; use ruff_python_parser::{Mode, ParseOptions, parse_unchecked}; @@ -23,12 +23,7 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { /// Collect all function definitions from a single file. fn collect_file(&self, path: &Utf8PathBuf) -> Option { - Self::collect_file_static( - path, - self.context.project().cwd(), - self.context.project().metadata().python_version(), - self.context.project().options().test_prefix(), - ) + collect_file(path, self.context) } /// Collect from a single test file path. @@ -52,39 +47,29 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { modules }); - // Walk directory in parallel and process files let walker = self.create_parallel_walker(path); - let cwd = self.context.project().cwd().clone(); - let python_version = self.context.project().metadata().python_version(); - let test_prefix = self.context.project().options().test_prefix().to_string(); walker.run(|| { let tx = tx.clone(); - let cwd = cwd.clone(); - let test_prefix = test_prefix.clone(); Box::new(move |entry| { let Ok(entry) = entry else { - return ignore::WalkState::Continue; + return WalkState::Continue; }; - // Only process files if !entry.file_type().is_some_and(|ft| ft.is_file()) { - return ignore::WalkState::Continue; + return WalkState::Continue; } let Ok(file_path) = Utf8PathBuf::from_path_buf(entry.path().to_path_buf()) else { - return ignore::WalkState::Continue; + return WalkState::Continue; }; - // Collect the module - if let Some(module) = - Self::collect_file_static(&file_path, &cwd, python_version, &test_prefix) - { + if let Some(module) = collect_file(&file_path, self.context) { let _ = tx.send(module); } - ignore::WalkState::Continue + WalkState::Continue }) }); @@ -109,90 +94,31 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { package } - /// Static version of `collect_file` that doesn't need self reference. - /// Used for parallel processing where we can't share &self across threads. - fn collect_file_static( - path: &Utf8PathBuf, - cwd: &Utf8PathBuf, - python_version: ruff_python_ast::PythonVersion, - test_prefix: &str, - ) -> Option { - let module_path = ModulePath::new(path, cwd)?; - - let source_text = std::fs::read_to_string(path).ok()?; - - let module_type: ModuleType = path.into(); - - // Parse the file to collect function definitions - let parsed = parse_unchecked( - &source_text, - ParseOptions::from(Mode::Module).with_target_version(python_version), - ) - .try_into_module()?; - - // Collect and categorize top-level function definitions from the parsed AST - let mut test_defs = Vec::new(); - let mut fixture_defs = Vec::new(); - - for stmt in parsed.into_syntax().body { - if let Stmt::FunctionDef(function_def) = stmt { - // Check if it's a fixture function - if is_fixture_function(&function_def) { - fixture_defs.push(Arc::new(function_def)); - } - // Check if it's a test function (starts with test prefix) - else if function_def.name.to_string().starts_with(test_prefix) { - test_defs.push(Arc::new(function_def)); - } - // Otherwise, ignore the function (it's not relevant for testing) - } - } - - let mut collected_module = - CollectedModule::new(module_path, path.clone(), module_type, source_text); - - // Add collected functions - for test_def in test_defs { - collected_module.add_test_function_def(test_def); - } - for fixture_def in fixture_defs { - collected_module.add_fixture_function_def(fixture_def); - } - - Some(collected_module) - } - /// Collect from all paths and build a complete package structure. pub(crate) fn collect_all(&self) -> CollectedPackage { let mut session_package = CollectedPackage::new(self.context.project().cwd().clone()); // Process all test paths for path_result in self.context.project().test_paths() { - let Ok(path) = path_result else { - // Report invalid path errors - if let Err(error) = path_result { + let path = match path_result { + Ok(path) => path, + Err(error) => { report_invalid_path(self.context, &error); + continue; } - continue; }; - // Clone the path for parent configuration lookup let path_for_config = path.path().to_owned(); match path { - karva_project::TestPath::File(file_path) => { - if let Some(module) = self.collect_test_file(&file_path) { - session_package.add_module(module); - } - } karva_project::TestPath::Directory(dir_path) => { let package = self.collect_directory(&dir_path); session_package.add_package(package); } - karva_project::TestPath::Function { + karva_project::TestPath::File(file_path) + | karva_project::TestPath::Function { path: file_path, .. } => { - // For specific function paths, still collect the whole module if let Some(module) = self.collect_test_file(&file_path) { session_package.add_module(module); } @@ -265,3 +191,39 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { .build_parallel() } } + +fn collect_file(path: &Utf8PathBuf, context: &Context) -> Option { + let module_path = ModulePath::new(path, context.project().cwd())?; + + let source_text = std::fs::read_to_string(path).ok()?; + + let module_type: ModuleType = path.into(); + + // Parse the file to collect function definitions + let parsed = parse_unchecked( + &source_text, + ParseOptions::from(Mode::Module) + .with_target_version(context.project().metadata().python_version()), + ) + .try_into_module()?; + + // Collect and categorize top-level function definitions from the parsed AST + let mut collected_module = + CollectedModule::new(module_path, path.clone(), module_type, source_text); + + for stmt in parsed.into_syntax().body { + if let Stmt::FunctionDef(function_def) = stmt { + if is_fixture_function(&function_def) { + collected_module.add_fixture_function_def(Arc::new(function_def)); + } else if function_def + .name + .to_string() + .starts_with(context.project().options().test_prefix()) + { + collected_module.add_test_function_def(Arc::new(function_def)); + } + } + } + + Some(collected_module) +} From 5b3f6281d5435dfb11540691eb806d02f96d178f Mon Sep 17 00:00:00 2001 From: Matthew Mckee Date: Wed, 3 Dec 2025 15:28:45 +0000 Subject: [PATCH 12/13] Update mode --- .github/workflows/ci.yml | 2 +- crates/karva_core/src/collection/collector.rs | 14 ++------------ 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 29be1773..a82d3119 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -138,7 +138,7 @@ jobs: with: token: ${{ secrets.CODSPEED_TOKEN }} run: cargo codspeed run --bench karva - mode: instrumentation + mode: simulation benchmarks-walltime: name: Run walltime benchmarks diff --git a/crates/karva_core/src/collection/collector.rs b/crates/karva_core/src/collection/collector.rs index 9ccbe952..63fb5f61 100644 --- a/crates/karva_core/src/collection/collector.rs +++ b/crates/karva_core/src/collection/collector.rs @@ -21,16 +21,6 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { Self { context } } - /// Collect all function definitions from a single file. - fn collect_file(&self, path: &Utf8PathBuf) -> Option { - collect_file(path, self.context) - } - - /// Collect from a single test file path. - pub(crate) fn collect_test_file(&self, path: &Utf8PathBuf) -> Option { - self.collect_file(path) - } - /// Collect from a directory in parallel using `WalkParallel`. pub(crate) fn collect_directory(&self, path: &Utf8PathBuf) -> CollectedPackage { let mut package = CollectedPackage::new(path.clone()); @@ -119,7 +109,7 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { | karva_project::TestPath::Function { path: file_path, .. } => { - if let Some(module) = self.collect_test_file(&file_path) { + if let Some(module) = { collect_file(&file_path, self.context) } { session_package.add_module(module); } } @@ -153,7 +143,7 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { if conftest_path.exists() { let mut package = CollectedPackage::new(current_path.to_path_buf()); - if let Some(module) = self.collect_test_file(&conftest_path) { + if let Some(module) = { collect_file(&conftest_path, self.context) } { package.add_configuration_module(module); session_package.add_package(package); } From ee3cebb2ceeb43d564755f1cc525bd342cdcd95a Mon Sep 17 00:00:00 2001 From: Matthew Mckee Date: Wed, 3 Dec 2025 17:07:21 +0000 Subject: [PATCH 13/13] Small improvements --- crates/karva_core/src/collection/collector.rs | 87 +++++++------ crates/karva_core/src/collection/mod.rs | 2 +- crates/karva_core/src/collection/models.rs | 74 +++++------ crates/karva_core/src/discovery/discoverer.rs | 123 ++++++------------ crates/karva_core/src/discovery/mod.rs | 6 +- .../src/discovery/models/function.rs | 4 - .../karva_core/src/discovery/models/module.rs | 69 +--------- .../src/discovery/models/package.rs | 111 ++-------------- crates/karva_core/src/discovery/visitor.rs | 53 +++----- .../karva_core/src/extensions/fixtures/mod.rs | 12 +- 10 files changed, 161 insertions(+), 380 deletions(-) diff --git a/crates/karva_core/src/collection/collector.rs b/crates/karva_core/src/collection/collector.rs index 63fb5f61..722b7878 100644 --- a/crates/karva_core/src/collection/collector.rs +++ b/crates/karva_core/src/collection/collector.rs @@ -3,12 +3,13 @@ use std::{sync::Arc, thread}; use camino::{Utf8Path, Utf8PathBuf}; use crossbeam_channel::unbounded; use ignore::{WalkBuilder, WalkState}; +use karva_project::TestPath; use ruff_python_ast::Stmt; use ruff_python_parser::{Mode, ParseOptions, parse_unchecked}; use super::models::{CollectedModule, CollectedPackage}; use crate::{ - Context, diagnostic::report_invalid_path, discovery::ModuleType, + Context, collection::ModuleType, diagnostic::report_invalid_path, extensions::fixtures::is_fixture_function, name::ModulePath, }; @@ -23,21 +24,30 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { /// Collect from a directory in parallel using `WalkParallel`. pub(crate) fn collect_directory(&self, path: &Utf8PathBuf) -> CollectedPackage { - let mut package = CollectedPackage::new(path.clone()); - // Create channels for communication - let (tx, rx) = unbounded(); + let (tx, rx) = unbounded::(); + + let cloned_path = path.clone(); // Spawn receiver thread to collect results let receiver_handle = thread::spawn(move || { - let mut modules = Vec::new(); - for module in rx { - modules.push(module); + let mut package = CollectedPackage::new(cloned_path); + + for collected_module in rx { + match collected_module.module_type() { + ModuleType::Test => { + package.add_module(collected_module); + } + ModuleType::Configuration => { + package.add_configuration_module(collected_module); + } + } } - modules + + package }); - let walker = self.create_parallel_walker(path); + let walker = self.create_parallel_walker(&path.clone()); walker.run(|| { let tx = tx.clone(); @@ -55,7 +65,7 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { return WalkState::Continue; }; - if let Some(module) = collect_file(&file_path, self.context) { + if let Some(module) = collect_file(&file_path, self.context, None) { let _ = tx.send(module); } @@ -63,32 +73,16 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { }) }); - // Drop the sender so receiver knows we're done + // Drop the original sender to close the channel drop(tx); - // Wait for receiver to finish - let modules = receiver_handle.join().unwrap(); - - // Add all collected modules to the package - for collected_module in modules { - match collected_module.module_type() { - ModuleType::Test => { - package.add_module(collected_module); - } - ModuleType::Configuration => { - package.add_configuration_module(collected_module); - } - } - } - - package + receiver_handle.join().unwrap() } /// Collect from all paths and build a complete package structure. pub(crate) fn collect_all(&self) -> CollectedPackage { let mut session_package = CollectedPackage::new(self.context.project().cwd().clone()); - // Process all test paths for path_result in self.context.project().test_paths() { let path = match path_result { Ok(path) => path, @@ -101,21 +95,27 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { let path_for_config = path.path().to_owned(); match path { - karva_project::TestPath::Directory(dir_path) => { + TestPath::Directory(dir_path) => { let package = self.collect_directory(&dir_path); session_package.add_package(package); } - karva_project::TestPath::File(file_path) - | karva_project::TestPath::Function { - path: file_path, .. + TestPath::Function { + path: file_path, + function_name, } => { - if let Some(module) = { collect_file(&file_path, self.context) } { + if let Some(module) = + collect_file(&file_path, self.context, Some(&function_name)) + { + session_package.add_module(module); + } + } + TestPath::File(file_path) => { + if let Some(module) = collect_file(&file_path, self.context, None) { session_package.add_module(module); } } } - // Collect parent configuration files self.collect_parent_configuration(&path_for_config, &mut session_package); } @@ -143,7 +143,7 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { if conftest_path.exists() { let mut package = CollectedPackage::new(current_path.to_path_buf()); - if let Some(module) = { collect_file(&conftest_path, self.context) } { + if let Some(module) = collect_file(&conftest_path, self.context, None) { package.add_configuration_module(module); session_package.add_package(package); } @@ -162,7 +162,6 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { /// Creates a configured parallel directory walker for Python file discovery. fn create_parallel_walker(&self, path: &Utf8PathBuf) -> ignore::WalkParallel { - // Configure thread pool size for optimal parallelism let num_threads = karva_project::max_parallelism().get(); WalkBuilder::new(path) @@ -182,14 +181,17 @@ impl<'ctx, 'proj, 'rep> ParallelCollector<'ctx, 'proj, 'rep> { } } -fn collect_file(path: &Utf8PathBuf, context: &Context) -> Option { +fn collect_file( + path: &Utf8PathBuf, + context: &Context, + only_function_name: Option<&str>, +) -> Option { let module_path = ModulePath::new(path, context.project().cwd())?; let source_text = std::fs::read_to_string(path).ok()?; let module_type: ModuleType = path.into(); - // Parse the file to collect function definitions let parsed = parse_unchecked( &source_text, ParseOptions::from(Mode::Module) @@ -197,12 +199,15 @@ fn collect_file(path: &Utf8PathBuf, context: &Context) -> Option>, + pub(crate) test_function_defs: Vec>, /// Fixture function definitions (functions with fixture decorators) - fixture_function_defs: Vec>, + pub(crate) fixture_function_defs: Vec>, } impl CollectedModule { pub(crate) const fn new( path: ModulePath, - file_path: Utf8PathBuf, + module_type: ModuleType, source_text: String, ) -> Self { Self { path, - file_path, + module_type, source_text, test_function_defs: Vec::new(), @@ -45,30 +46,14 @@ impl CollectedModule { self.fixture_function_defs.push(function_def); } - pub(crate) const fn path(&self) -> &ModulePath { - &self.path - } - pub(crate) const fn file_path(&self) -> &Utf8PathBuf { - &self.file_path + self.path.path() } pub(crate) const fn module_type(&self) -> ModuleType { self.module_type } - pub(crate) fn source_text(&self) -> &str { - &self.source_text - } - - pub(crate) fn test_function_defs(&self) -> &[Arc] { - &self.test_function_defs - } - - pub(crate) fn fixture_function_defs(&self) -> &[Arc] { - &self.fixture_function_defs - } - pub(crate) fn is_empty(&self) -> bool { self.test_function_defs.is_empty() && self.fixture_function_defs.is_empty() } @@ -78,10 +63,10 @@ impl CollectedModule { /// This is populated during the parallel collection phase. #[derive(Debug, Clone)] pub struct CollectedPackage { - path: Utf8PathBuf, - modules: HashMap, - packages: HashMap, - configuration_module_path: Option, + pub(crate) path: Utf8PathBuf, + pub(crate) modules: HashMap, + pub(crate) packages: HashMap, + pub(crate) configuration_module_path: Option, } impl CollectedPackage { @@ -98,14 +83,6 @@ impl CollectedPackage { &self.path } - pub(crate) const fn modules(&self) -> &HashMap { - &self.modules - } - - pub(crate) const fn packages(&self) -> &HashMap { - &self.packages - } - /// Add a module to this package. /// /// If the module path does not start with our path, do nothing. @@ -264,3 +241,24 @@ impl CollectedModule { } } } + +/// The type of module. +/// This is used to differentiation between files that contain only test functions and files that contain only configuration functions. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ModuleType { + Test, + Configuration, +} + +impl From<&Utf8PathBuf> for ModuleType { + fn from(path: &Utf8PathBuf) -> Self { + if path + .file_name() + .is_some_and(|file_name| file_name == "conftest.py") + { + Self::Configuration + } else { + Self::Test + } + } +} diff --git a/crates/karva_core/src/discovery/discoverer.rs b/crates/karva_core/src/discovery/discoverer.rs index 1d33c11a..a73cd4a0 100644 --- a/crates/karva_core/src/discovery/discoverer.rs +++ b/crates/karva_core/src/discovery/discoverer.rs @@ -1,13 +1,11 @@ -use camino::Utf8PathBuf; -use karva_project::TestPath; use pyo3::prelude::*; #[cfg(test)] use crate::utils::attach_with_project; use crate::{ Context, - collection::{CollectedPackage, ParallelCollector}, - discovery::{DiscoveredModule, DiscoveredPackage, ModuleType, visitor::DiscoveredFunctions}, + collection::{CollectedModule, CollectedPackage, ModuleType, ParallelCollector}, + discovery::{DiscoveredModule, DiscoveredPackage, visitor::discover}, utils::add_to_sys_path, }; @@ -34,52 +32,12 @@ impl<'ctx, 'proj, 'rep> StandardDiscoverer<'ctx, 'proj, 'rep> { tracing::info!("Collecting test files in parallel..."); - // Collect function-specific paths for filtering and check for shadowing - let mut function_filters = std::collections::HashMap::new(); - let mut all_file_paths = std::collections::HashSet::new(); - let mut all_directory_paths = std::collections::HashSet::new(); - - for path in self.context.project().test_paths().into_iter().flatten() { - match path { - TestPath::File(file_path) => { - all_file_paths.insert(file_path); - } - TestPath::Directory(dir_path) => { - all_directory_paths.insert(dir_path); - } - TestPath::Function { - path, - function_name, - } => { - function_filters.insert(path, function_name); - } - } - } - - // Remove function filters if the file is explicitly listed or is in a listed directory - function_filters.retain(|file_path, _| { - // If the file itself is in the test paths, don't filter - if all_file_paths.contains(file_path) { - return false; - } - // If any parent directory is in the test paths, don't filter - for dir_path in &all_directory_paths { - if file_path.starts_with(dir_path) { - return false; - } - } - true - }); - - // Phase 1: Collection (parallel) - collect all AST function definitions let collector = ParallelCollector::new(self.context); let collected_package = collector.collect_all(); tracing::info!("Discovering test functions and fixtures..."); - // Phase 2: Discovery (single-threaded) - convert collected AST to test functions and fixtures - let mut session_package = - self.convert_collected_to_discovered(py, &collected_package, &function_filters); + let mut session_package = self.convert_collected_to_discovered(py, collected_package); session_package.shrink(); @@ -91,50 +49,49 @@ impl<'ctx, 'proj, 'rep> StandardDiscoverer<'ctx, 'proj, 'rep> { fn convert_collected_to_discovered( &self, py: Python<'_>, - collected_package: &CollectedPackage, - function_filters: &std::collections::HashMap, + collected_package: CollectedPackage, ) -> DiscoveredPackage { - let mut discovered_package = DiscoveredPackage::new(collected_package.path().clone()); + let CollectedPackage { + path, + modules, + packages, + configuration_module_path: _, + } = collected_package; - // Convert all modules - for collected_module in collected_package.modules().values() { - let mut module = DiscoveredModule::new_with_source( - collected_module.path().clone(), - collected_module.module_type(), - collected_module.source_text().to_string(), - ); + let mut discovered_package = DiscoveredPackage::new(path); - let DiscoveredFunctions { - functions, - fixtures, - } = super::visitor::discover( + // Convert all modules + for collected_module in modules.into_values() { + let CollectedModule { + path, + module_type, + source_text, + test_function_defs, + fixture_function_defs, + } = collected_module; + + let mut module = DiscoveredModule::new_with_source(path.clone(), source_text); + + discover( self.context, py, - &module, - collected_module.test_function_defs(), - collected_module.fixture_function_defs(), + &mut module, + test_function_defs, + fixture_function_defs, ); - module.extend_test_functions(functions); - module.extend_fixtures(fixtures); - - // Apply function filtering if this module has a function filter - if let Some(function_name) = function_filters.get(collected_module.file_path()) { - module.filter_test_functions(function_name); - } - - if collected_module.module_type() == ModuleType::Configuration { + if module_type == ModuleType::Configuration { discovered_package.add_configuration_module(module); } else { - discovered_package.add_module(module); + discovered_package.add_direct_module(module); } } // Convert all subpackages recursively - for collected_subpackage in collected_package.packages().values() { + for collected_subpackage in packages.into_values() { let discovered_subpackage = - self.convert_collected_to_discovered(py, collected_subpackage, function_filters); - discovered_package.add_package(discovered_subpackage); + self.convert_collected_to_discovered(py, collected_subpackage); + discovered_package.add_direct_subpackage(discovered_subpackage); } discovered_package @@ -144,6 +101,7 @@ impl<'ctx, 'proj, 'rep> StandardDiscoverer<'ctx, 'proj, 'rep> { #[cfg(test)] mod tests { + use camino::Utf8PathBuf; use insta::{allow_duplicates, assert_snapshot}; use karva_project::{Project, ProjectOptions}; use karva_test::TestContext; @@ -447,16 +405,19 @@ def test_function(): pass fn test_discover_function_inside_function() { let env = TestContext::with_files([( "/test_file.py", - "def test_function(): def test_function2(): pass", + " +def test_function(): + def test_function2(): pass", )]); - let mapped_dir = env.mapped_path("").unwrap(); - let path = mapped_dir.join("test_file.py"); - - let project = Project::new(env.cwd(), vec![path]); + let project = Project::new(env.cwd(), vec![env.cwd()]); let session = session(&project); - assert_snapshot!(session.display(), @""); + assert_snapshot!(session.display(), @r" + └── // + └── .test_file + └── test_cases [test_function] + "); } #[test] diff --git a/crates/karva_core/src/discovery/mod.rs b/crates/karva_core/src/discovery/mod.rs index 70823e18..81e46fe2 100644 --- a/crates/karva_core/src/discovery/mod.rs +++ b/crates/karva_core/src/discovery/mod.rs @@ -3,8 +3,4 @@ pub mod models; pub mod visitor; pub use discoverer::StandardDiscoverer; -pub use models::{ - function::TestFunction, - module::{DiscoveredModule, ModuleType}, - package::DiscoveredPackage, -}; +pub use models::{function::TestFunction, module::DiscoveredModule, package::DiscoveredPackage}; diff --git a/crates/karva_core/src/discovery/models/function.rs b/crates/karva_core/src/discovery/models/function.rs index 4ecf88ca..a69cd59b 100644 --- a/crates/karva_core/src/discovery/models/function.rs +++ b/crates/karva_core/src/discovery/models/function.rs @@ -46,10 +46,6 @@ impl TestFunction { tags, } } - - pub(crate) fn function_name(&self) -> &str { - &self.stmt_function_def.name - } } impl RequiresFixtures for TestFunction { diff --git a/crates/karva_core/src/discovery/models/module.rs b/crates/karva_core/src/discovery/models/module.rs index ad7da5a8..894cc499 100644 --- a/crates/karva_core/src/discovery/models/module.rs +++ b/crates/karva_core/src/discovery/models/module.rs @@ -9,21 +9,15 @@ pub struct DiscoveredModule { path: ModulePath, test_functions: Vec, fixtures: Vec, - module_type: ModuleType, source_text: String, } impl DiscoveredModule { - pub(crate) const fn new_with_source( - path: ModulePath, - module_type: ModuleType, - source_text: String, - ) -> Self { + pub(crate) const fn new_with_source(path: ModulePath, source_text: String) -> Self { Self { path, test_functions: Vec::new(), fixtures: Vec::new(), - module_type, source_text, } } @@ -40,28 +34,20 @@ impl DiscoveredModule { self.path.module_name() } - pub(crate) const fn module_type(&self) -> ModuleType { - self.module_type - } - pub(crate) fn test_functions(&self) -> Vec<&TestFunction> { self.test_functions.iter().collect() } - pub(crate) fn extend_test_functions(&mut self, test_functions: Vec) { - self.test_functions.extend(test_functions); - } - - pub(crate) fn filter_test_functions(&mut self, name: &str) { - self.test_functions.retain(|tc| tc.function_name() == name); + pub(crate) fn add_test_function(&mut self, test_function: TestFunction) { + self.test_functions.push(test_function); } pub(crate) const fn fixtures(&self) -> &Vec { &self.fixtures } - pub(crate) fn extend_fixtures(&mut self, fixtures: Vec) { - self.fixtures.extend(fixtures); + pub(crate) fn add_fixture(&mut self, fixture: Fixture) { + self.fixtures.push(fixture); } #[cfg(test)] @@ -77,30 +63,6 @@ impl DiscoveredModule { SourceFileBuilder::new(self.path().as_str(), self.source_text()).finish() } - pub(crate) fn update(&mut self, module: Self) { - if self.path == module.path { - for test_case in module.test_functions { - if !self - .test_functions - .iter() - .any(|existing| existing.name == test_case.name) - { - self.test_functions.push(test_case); - } - } - - for fixture in module.fixtures { - if !self - .fixtures - .iter() - .any(|existing| existing.name() == fixture.name()) - { - self.fixtures.push(fixture); - } - } - } - } - pub(crate) fn is_empty(&self) -> bool { self.test_functions.is_empty() && self.fixtures.is_empty() } @@ -111,27 +73,6 @@ impl DiscoveredModule { } } -/// The type of module. -/// This is used to differentiation between files that contain only test functions and files that contain only configuration functions. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ModuleType { - Test, - Configuration, -} - -impl From<&Utf8PathBuf> for ModuleType { - fn from(path: &Utf8PathBuf) -> Self { - if path - .file_name() - .is_some_and(|file_name| file_name == "conftest.py") - { - Self::Configuration - } else { - Self::Test - } - } -} - #[cfg(test)] pub struct DisplayDiscoveredModule<'proj> { module: &'proj DiscoveredModule, diff --git a/crates/karva_core/src/discovery/models/package.rs b/crates/karva_core/src/discovery/models/package.rs index bad9a76a..44cb66a0 100644 --- a/crates/karva_core/src/discovery/models/package.rs +++ b/crates/karva_core/src/discovery/models/package.rs @@ -4,10 +4,7 @@ use camino::Utf8PathBuf; #[cfg(test)] use crate::discovery::TestFunction; -use crate::{ - discovery::{DiscoveredModule, ModuleType}, - name::ModulePath, -}; +use crate::{discovery::DiscoveredModule, name::ModulePath}; /// A package represents a single python directory. #[derive(Debug)] @@ -68,98 +65,19 @@ impl DiscoveredPackage { } } - /// Add a module to this package. - /// - /// If the module path does not start with our path, do nothing. - /// - /// If the module path equals our path, use update method. - /// - /// Otherwise, strip the current path from the start and add the module to the appropriate sub-package. - pub(crate) fn add_module(&mut self, module: DiscoveredModule) { - if !module.path().starts_with(self.path()) { - return; - } - - if module.is_empty() { - return; - } - - if module.path().parent().is_some_and(|p| p == self.path()) { - if let Some(existing_module) = self.modules.get_mut(module.path()) { - existing_module.update(module); - } else { - if module.module_type() == ModuleType::Configuration { - self.configuration_module_path = Some(module.module_path().clone()); - } - self.modules.insert(module.path().clone(), module); - } - return; - } - - let Ok(relative_path) = module.path().strip_prefix(self.path()) else { - return; - }; - - let components: Vec<_> = relative_path.components().collect(); - - if components.is_empty() { - return; - } - - let first_component = components[0]; - let intermediate_path = self.path().join(first_component); - - if let Some(existing_package) = self.packages.get_mut(&intermediate_path) { - existing_package.add_module(module); - } else { - let mut new_package = Self::new(intermediate_path); - new_package.add_module(module); - self.packages - .insert(new_package.path().clone(), new_package); - } + /// Add a module directly to this package. + pub(crate) fn add_direct_module(&mut self, module: DiscoveredModule) { + self.modules.insert(module.path().clone(), module); } pub(crate) fn add_configuration_module(&mut self, module: DiscoveredModule) { self.configuration_module_path = Some(module.module_path().clone()); - self.add_module(module); + self.add_direct_module(module); } - /// Add a package to this package. - /// - /// If the package path equals our path, use update method. - /// - /// Otherwise, strip the current path from the start and add the package to the appropriate sub-package. - pub(crate) fn add_package(&mut self, package: Self) { - if !package.path().starts_with(self.path()) { - return; - } - - if package.path() == self.path() { - self.update(package); - return; - } - - let Ok(relative_path) = package.path().strip_prefix(self.path()) else { - return; - }; - - let components: Vec<_> = relative_path.components().collect(); - - if components.is_empty() { - return; - } - - let first_component = components[0]; - let intermediate_path = self.path().join(first_component); - - if let Some(existing_package) = self.packages.get_mut(&intermediate_path) { - existing_package.add_package(package); - } else { - let mut new_package = Self::new(intermediate_path); - new_package.add_package(package); - self.packages - .insert(new_package.path().clone(), new_package); - } + /// Adds a package directly as a subpackage. + pub(crate) fn add_direct_subpackage(&mut self, other: Self) { + self.packages.insert(other.path().clone(), other); } #[cfg(test)] @@ -174,19 +92,6 @@ impl DiscoveredPackage { total } - pub(crate) fn update(&mut self, package: Self) { - for (_, module) in package.modules { - self.add_module(module); - } - for (_, package) in package.packages { - self.add_package(package); - } - - if let Some(module) = package.configuration_module_path { - self.configuration_module_path = Some(module); - } - } - #[cfg(test)] pub(crate) fn test_functions(&self) -> Vec<&TestFunction> { let mut functions = Vec::new(); diff --git a/crates/karva_core/src/discovery/visitor.rs b/crates/karva_core/src/discovery/visitor.rs index fc52f9f7..c936ade0 100644 --- a/crates/karva_core/src/discovery/visitor.rs +++ b/crates/karva_core/src/discovery/visitor.rs @@ -21,13 +21,7 @@ struct FunctionDefinitionVisitor<'ctx, 'proj, 'rep, 'py, 'a> { context: &'ctx Context<'proj, 'rep>, /// The current module. - module: &'a DiscoveredModule, - - /// `TestFunction` objects that have been discovered. - test_functions: Vec, - - /// `Fixture` objects that have been discovered. - fixtures: Vec, + module: &'a mut DiscoveredModule, /// We only import the module once we actually need it, this ensures we don't import random files. /// Which has a side effect of running them. @@ -43,13 +37,11 @@ impl<'ctx, 'proj, 'rep, 'py, 'a> FunctionDefinitionVisitor<'ctx, 'proj, 'rep, 'p const fn new( py: Python<'py>, context: &'ctx Context<'proj, 'rep>, - module: &'a DiscoveredModule, + module: &'a mut DiscoveredModule, ) -> Self { Self { context, module, - test_functions: Vec::new(), - fixtures: Vec::new(), py_module: None, py, tried_to_import_module: false, @@ -98,13 +90,13 @@ impl<'ctx, 'proj, 'rep, 'py, 'a> FunctionDefinitionVisitor<'ctx, 'proj, 'rep, 'p if value.is_callable() { let name_str = name.extract::().unwrap_or_default(); - for fixture in &self.fixtures { + for fixture in self.module.fixtures() { if fixture.original_function_name() == name_str { continue 'outer; } } - for function in &self.test_functions { + for function in self.module.test_functions() { if function.name.function_name() == name_str { continue 'outer; } @@ -184,12 +176,12 @@ impl<'ctx, 'proj, 'rep, 'py, 'a> FunctionDefinitionVisitor<'ctx, 'proj, 'rep, 'p if let Ok(fixture_def) = Fixture::try_from_function( self.py, - &Arc::new(stmt_function_def.clone()), + Arc::new(stmt_function_def.clone()), &py_module, self.module.module_path(), is_generator_function, ) { - self.fixtures.push(fixture_def); + self.module.add_fixture(fixture_def); } } } @@ -212,12 +204,12 @@ impl FunctionDefinitionVisitor<'_, '_, '_, '_, '_> { match Fixture::try_from_function( self.py, - stmt_function_def, + stmt_function_def.clone(), py_module, self.module.module_path(), is_generator_function, ) { - Ok(fixture_def) => self.fixtures.push(fixture_def), + Ok(fixture_def) => self.module.add_fixture(fixture_def), Err(e) => { report_invalid_fixture( self.context, @@ -230,7 +222,7 @@ impl FunctionDefinitionVisitor<'_, '_, '_, '_, '_> { } } - fn process_test_function(&mut self, stmt_function_def: &Arc) { + fn process_test_function(&mut self, stmt_function_def: Arc) { self.try_import_module(); let Some(py_module) = self.py_module.as_ref() else { @@ -238,29 +230,23 @@ impl FunctionDefinitionVisitor<'_, '_, '_, '_, '_> { }; if let Ok(py_function) = py_module.getattr(stmt_function_def.name.to_string()) { - self.test_functions.push(TestFunction::new( + self.module.add_test_function(TestFunction::new( self.py, self.module, - Arc::clone(stmt_function_def), + stmt_function_def, py_function.unbind(), )); } } } -#[derive(Debug, Default)] -pub struct DiscoveredFunctions { - pub(crate) functions: Vec, - pub(crate) fixtures: Vec, -} - pub fn discover( context: &Context, py: Python, - module: &DiscoveredModule, - test_function_defs: &[Arc], - fixture_function_defs: &[Arc], -) -> DiscoveredFunctions { + module: &mut DiscoveredModule, + test_function_defs: Vec>, + fixture_function_defs: Vec>, +) { tracing::info!( "Discovering test functions and fixtures in module {}", module.name() @@ -268,24 +254,17 @@ pub fn discover( let mut visitor = FunctionDefinitionVisitor::new(py, context, module); - // Process collected test functions for test_function_def in test_function_defs { visitor.process_test_function(test_function_def); } - // Process collected fixture functions for fixture_function_def in fixture_function_defs { - visitor.process_fixture_function(fixture_function_def); + visitor.process_fixture_function(&fixture_function_def); } if context.project().options().try_import_fixtures() { visitor.find_extra_fixtures(); } - - DiscoveredFunctions { - functions: visitor.test_functions, - fixtures: visitor.fixtures, - } } fn parsed_module(source: &str, python_version: PythonVersion) -> Parsed { diff --git a/crates/karva_core/src/extensions/fixtures/mod.rs b/crates/karva_core/src/extensions/fixtures/mod.rs index ac0e7b0b..0bde2d21 100644 --- a/crates/karva_core/src/extensions/fixtures/mod.rs +++ b/crates/karva_core/src/extensions/fixtures/mod.rs @@ -98,7 +98,7 @@ impl Fixture { pub(crate) fn try_from_function( py: Python<'_>, - stmt_function_def: &Arc, + stmt_function_def: Arc, py_module: &Bound<'_, PyModule>, module_path: &ModulePath, is_generator_function: bool, @@ -109,7 +109,7 @@ impl Fixture { let try_karva = Self::try_from_karva_function( py, - stmt_function_def, + stmt_function_def.clone(), &function, module_path.clone(), is_generator_function, @@ -142,7 +142,7 @@ impl Fixture { pub(crate) fn try_from_pytest_function( py: Python<'_>, - stmt_function_def: &Arc, + stmt_function_def: Arc, function: &Bound<'_, PyAny>, module_name: ModulePath, is_generator_function: bool, @@ -180,7 +180,7 @@ impl Fixture { Ok(Self::new( py, QualifiedFunctionName::new(name, module_name), - stmt_function_def.clone(), + stmt_function_def, fixture_scope, auto_use.extract::().unwrap_or(false), fixture_function.into(), @@ -191,7 +191,7 @@ impl Fixture { pub(crate) fn try_from_karva_function( py: Python<'_>, - stmt_function_def: &Arc, + stmt_function_def: Arc, function: &Bound<'_, PyAny>, module_path: ModulePath, is_generator_function: bool, @@ -213,7 +213,7 @@ impl Fixture { Ok(Self::new( py, QualifiedFunctionName::new(name, module_path), - stmt_function_def.clone(), + stmt_function_def, fixture_scope, auto_use, py_function.into(),