From cb1c87b877cd058a9290402f0de6c8972b32db59 Mon Sep 17 00:00:00 2001 From: Shaan Majid <70789625+shaanmajid@users.noreply.github.com> Date: Sun, 8 Feb 2026 18:47:22 -0600 Subject: [PATCH 1/5] feat: add `repo: self` support for consuming own hooks Allow projects that publish hooks via `.pre-commit-hooks.yaml` to consume their own hooks with `repo: self`, avoiding redundant `repo: local` definitions. Add a new `SelfRepo` config variant using `"self"` as the sentinel value. Hooks use `RemoteHook` shape (only `id` required, rest are optional overrides). At init time, reads `.pre-commit-hooks.yaml` from the project root and resolves hooks via the existing manifest-lookup + override machinery. Each project's self-repo hooks include the canonicalized project path as an env key dependency, so two projects sharing a `PREK_HOME` get isolated hook environments. Track `project_root` alongside the config path in `config-tracking.json` so cache GC can reconstruct the correct dependency token; migrate the legacy `Vec` format transparently. Handles `repo: self` across all code paths: config parsing, workspace init, hook resolution, JSON schema, auto-update (skip), cache GC, tab completion, meta hooks, and language dispatch. --- crates/prek/src/cli/auto_update.rs | 2 +- crates/prek/src/cli/cache_gc.rs | 59 +++++++++++--- crates/prek/src/cli/completion.rs | 5 ++ crates/prek/src/cli/run/run.rs | 7 +- crates/prek/src/config.rs | 115 +++++++++++++++++++++++++++- crates/prek/src/hook.rs | 62 ++++++++++++--- crates/prek/src/hooks/meta_hooks.rs | 1 + crates/prek/src/languages/mod.rs | 2 +- crates/prek/src/schema.rs | 18 ++++- crates/prek/src/store.rs | 73 +++++++++++++++--- crates/prek/src/workspace.rs | 44 ++++++++++- prek.schema.json | 32 +++++++- 12 files changed, 374 insertions(+), 46 deletions(-) diff --git a/crates/prek/src/cli/auto_update.rs b/crates/prek/src/cli/auto_update.rs index 9c31509c..1cfca728 100644 --- a/crates/prek/src/cli/auto_update.rs +++ b/crates/prek/src/cli/auto_update.rs @@ -517,7 +517,7 @@ fn render_updated_toml_config( .and_then(|value| value.as_str()) .unwrap_or_default(); - if matches!(repo_value, "local" | "meta" | "builtin") { + if matches!(repo_value, "local" | "meta" | "builtin" | "self") { continue; } diff --git a/crates/prek/src/cli/cache_gc.rs b/crates/prek/src/cli/cache_gc.rs index c885f69f..ed570cad 100644 --- a/crates/prek/src/cli/cache_gc.rs +++ b/crates/prek/src/cli/cache_gc.rs @@ -5,6 +5,7 @@ use std::path::Path; use anyhow::Result; use owo_colors::OwoColorize; +use prek_consts::PRE_COMMIT_HOOKS_YAML; use rustc_hash::FxHashMap; use rustc_hash::FxHashSet; use strum::IntoEnumIterator; @@ -12,10 +13,12 @@ use tracing::{debug, trace, warn}; use crate::cli::ExitStatus; use crate::cli::cache_size::{dir_size_bytes, human_readable_bytes}; -use crate::config::{self, Error as ConfigError, Repo as ConfigRepo, load_config}; -use crate::hook::{HOOK_MARKER, HookEnvKey, HookSpec, InstallInfo, Repo as HookRepo}; +use crate::config::{self, Error as ConfigError, Repo as ConfigRepo, load_config, read_manifest}; +use crate::hook::{ + HOOK_MARKER, HookEnvKey, HookSpec, InstallInfo, Repo as HookRepo, self_repo_dependency, +}; use crate::printer::Printer; -use crate::store::{CacheBucket, REPO_MARKER, Store, ToolBucket}; +use crate::store::{CacheBucket, REPO_MARKER, Store, ToolBucket, TrackedConfig}; #[derive(Debug, Copy, Clone, Eq, PartialEq)] enum RemovalKind { @@ -147,7 +150,7 @@ pub(crate) async fn cache_gc( return Ok(ExitStatus::Success); } - let mut kept_configs: FxHashSet<&Path> = FxHashSet::default(); + let mut kept_configs: FxHashSet = FxHashSet::default(); let mut used_repo_keys: FxHashSet = FxHashSet::default(); let mut used_hook_env_dirs: FxHashSet = FxHashSet::default(); let mut used_tools: FxHashSet = FxHashSet::default(); @@ -160,7 +163,8 @@ pub(crate) async fn cache_gc( let installed = store.installed_hooks().await; - for config_path in &tracked_configs { + for tracked in &tracked_configs { + let config_path = &tracked.path; let config = match load_config(config_path) { Ok(config) => { trace!(path = %config_path.display(), "Found tracked config"); @@ -173,14 +177,18 @@ pub(crate) async fn cache_gc( } err => { warn!(path = %config_path.display(), %err, "Failed to parse config, skipping for GC"); - kept_configs.insert(config_path); + kept_configs.insert(tracked.clone()); continue; } }, }; - kept_configs.insert(config_path); + kept_configs.insert(tracked.clone()); - used_env_keys.extend(hook_env_keys_from_config(store, &config)); + used_env_keys.extend(hook_env_keys_from_config( + store, + &tracked.project_root, + &config, + )); // Mark repos referenced by this config (if present in store). // We do this via config parsing (no clone), so GC won't keep repos for missing configs. @@ -218,7 +226,6 @@ pub(crate) async fn cache_gc( // Update tracking file to drop configs that no longer exist. if !dry_run && kept_configs.len() != tracked_configs.len() { - let kept_configs = kept_configs.into_iter().map(Path::to_path_buf).collect(); store.update_tracked_configs(&kept_configs)?; } @@ -338,7 +345,11 @@ fn print_removed_details(printer: Printer, verb: &str, removal: &Removal) -> Res Ok(()) } -fn hook_env_keys_from_config(store: &Store, config: &config::Config) -> Vec { +fn hook_env_keys_from_config( + store: &Store, + project_root: &Path, + config: &config::Config, +) -> Vec { let mut keys = Vec::new(); for repo_config in &config.repos { @@ -392,7 +403,33 @@ fn hook_env_keys_from_config(store: &Store, config: &config::Config) -> Vec {} // Meta repos and builtin repos do not have hook envs. + ConfigRepo::SelfRepo(repo_config) => { + let manifest_path = project_root.join(PRE_COMMIT_HOOKS_YAML); + let Ok(manifest) = read_manifest(&manifest_path) else { + continue; + }; + let manifest_hooks: FxHashMap<&str, &config::ManifestHook> = manifest + .hooks + .iter() + .map(|hook| (hook.id.as_str(), hook)) + .collect(); + let project_root_dep = self_repo_dependency(project_root); + for hook_config in &repo_config.hooks { + let Some(manifest_hook) = manifest_hooks.get(hook_config.id.as_str()) else { + continue; + }; + let mut hook_spec: HookSpec = (*manifest_hook).clone().into(); + hook_spec.apply_remote_hook_overrides(hook_config); + match HookEnvKey::from_hook_spec(config, hook_spec, Some(&project_root_dep)) { + Ok(Some(key)) => keys.push(key), + Ok(None) => {} + Err(err) => { + warn!(hook = %hook_config.id, %err, "Failed to compute hook env key, skipping"); + } + } + } + } + _ => {} // Meta and builtin repos do not have cached hook envs. } } diff --git a/crates/prek/src/cli/completion.rs b/crates/prek/src/cli/completion.rs index 58aaea87..e37e467e 100644 --- a/crates/prek/src/cli/completion.rs +++ b/crates/prek/src/cli/completion.rs @@ -178,6 +178,11 @@ fn all_hooks(proj: &Project) -> Vec<(String, Option)> { out.push((h.id.clone(), Some(h.name.clone()))); } } + config::Repo::SelfRepo(cfg) => { + for h in &cfg.hooks { + out.push((h.id.clone(), h.name.as_ref().map(ToString::to_string))); + } + } } } out diff --git a/crates/prek/src/cli/run/run.rs b/crates/prek/src/cli/run/run.rs index d4619090..2759e947 100644 --- a/crates/prek/src/cli/run/run.rs +++ b/crates/prek/src/cli/run/run.rs @@ -87,7 +87,12 @@ pub(crate) async fn run( let reporter = HookInitReporter::new(printer); let lock = store.lock_async().await?; - store.track_configs(workspace.projects().iter().map(|p| p.config_file()))?; + store.track_configs( + workspace + .projects() + .iter() + .map(|p| (p.config_file(), p.path())), + )?; let hooks = workspace .init_hooks(store, Some(&reporter)) diff --git a/crates/prek/src/config.rs b/crates/prek/src/config.rs index eccca9bf..722f18f6 100644 --- a/crates/prek/src/config.rs +++ b/crates/prek/src/config.rs @@ -16,7 +16,9 @@ use crate::fs::Simplified; use crate::identify; use crate::install_source::InstallSource; #[cfg(feature = "schemars")] -use crate::schema::{schema_repo_builtin, schema_repo_local, schema_repo_meta, schema_repo_remote}; +use crate::schema::{ + schema_repo_builtin, schema_repo_local, schema_repo_meta, schema_repo_remote, schema_repo_self, +}; use crate::version; use crate::warn_user; use crate::warn_user_once; @@ -672,12 +674,31 @@ pub(crate) struct BuiltinRepo { _unused_keys: BTreeMap, } +#[derive(Debug, Clone, Deserialize)] +#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] +pub(crate) struct SelfRepo { + #[cfg_attr(feature = "schemars", schemars(schema_with = "schema_repo_self"))] + pub repo: String, + #[serde(skip_serializing)] + pub hooks: Vec, + + #[serde(skip_serializing, flatten)] + _unused_keys: BTreeMap, +} + +impl Display for SelfRepo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("self") + } +} + #[derive(Debug, Clone)] pub(crate) enum Repo { Remote(RemoteRepo), Local(LocalRepo), Meta(MetaRepo), Builtin(BuiltinRepo), + SelfRepo(SelfRepo), } impl<'de> Deserialize<'de> for Repo { @@ -797,6 +818,22 @@ impl<'de> Deserialize<'de> for Repo { _unused_keys: unused, })) } + "self" => { + if rev.is_some() { + return Err(M::Error::custom("`rev` is not allowed for self repos")); + } + let hooks = match hooks.ok_or_else(|| M::Error::missing_field("hooks"))? { + HooksValue::Remote(hooks) => hooks, + HooksValue::Local(_) | HooksValue::Meta(_) | HooksValue::Builtin(_) => { + return Err(M::Error::custom("invalid hooks for self repo")); + } + }; + Ok(Repo::SelfRepo(SelfRepo { + repo: "self".to_string(), + hooks, + _unused_keys: unused, + })) + } _ => { let rev = rev.ok_or_else(|| M::Error::missing_field("rev"))?; let hooks = match hooks.ok_or_else(|| M::Error::missing_field("hooks"))? { @@ -949,6 +986,10 @@ fn collect_unused_paths(config: &Config) -> Vec { &builtin._unused_keys, Box::new(builtin.hooks.iter().map(|h| &h.options)), ), + Repo::SelfRepo(self_repo) => ( + &self_repo._unused_keys, + Box::new(self_repo.hooks.iter().map(|h| &h.options)), + ), }; push_unused_paths( @@ -1967,4 +2008,76 @@ mod tests { let config = serde_saphyr::from_str::(yaml).unwrap(); insta::assert_debug_snapshot!(config); } + + #[test] + fn parse_self_repo() { + let yaml = indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: my-hook + "}; + let result = serde_saphyr::from_str::(yaml).unwrap(); + insta::assert_debug_snapshot!(result); + } + + #[test] + fn parse_self_repo_with_rev() { + let yaml = indoc::indoc! {r" + repos: + - repo: self + rev: v1.0.0 + hooks: + - id: my-hook + "}; + let err = serde_saphyr::from_str::(yaml).unwrap_err(); + insta::assert_snapshot!(err, @r" + error: line 2 column 5: `rev` is not allowed for self repos at line 2, column 5 + --> :2:5 + | + 1 | repos: + 2 | - repo: self + | ^ `rev` is not allowed for self repos at line 2, column 5 + 3 | rev: v1.0.0 + 4 | hooks: + | + "); + } + + #[test] + fn parse_self_repo_with_overrides() { + let yaml = indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: my-hook + args: [--flag] + files: ^src/ + "}; + let result = serde_saphyr::from_str::(yaml).unwrap(); + insta::assert_debug_snapshot!(result); + } + + #[test] + fn parse_self_repo_rev_before_repo() { + let yaml = indoc::indoc! {r" + repos: + - rev: v1.0.0 + repo: self + hooks: + - id: my-hook + "}; + let err = serde_saphyr::from_str::(yaml).unwrap_err(); + insta::assert_snapshot!(err, @r" + error: line 2 column 5: `rev` is not allowed for self repos at line 2, column 5 + --> :2:5 + | + 1 | repos: + 2 | - rev: v1.0.0 + | ^ `rev` is not allowed for self repos at line 2, column 5 + 3 | repo: self + 4 | hooks: + | + "); + } } diff --git a/crates/prek/src/hook.rs b/crates/prek/src/hook.rs index 82e8cd72..c428373d 100644 --- a/crates/prek/src/hook.rs +++ b/crates/prek/src/hook.rs @@ -161,6 +161,12 @@ pub(crate) enum Repo { Builtin { hooks: Vec, }, + #[expect(clippy::enum_variant_names)] + SelfRepo { + /// Path to the project root containing the manifest. + path: PathBuf, + hooks: Vec, + }, } impl Repo { @@ -202,10 +208,25 @@ impl Repo { } } - /// Get the path to the cloned repo if it is a remote repo. + /// Construct a self repo from the project root, reading its manifest. + pub(crate) fn self_repo(project_root: PathBuf) -> Result { + let manifest_path = project_root.join(PRE_COMMIT_HOOKS_YAML); + let manifest = read_manifest(&manifest_path).map_err(|e| Error::Manifest { + repo: "self".to_string(), + error: e, + })?; + let hooks = manifest.hooks.into_iter().map(Into::into).collect(); + + Ok(Self::SelfRepo { + path: project_root, + hooks, + }) + } + + /// Get the path to the repo if it is a remote or self repo. pub(crate) fn path(&self) -> Option<&Path> { match self { - Repo::Remote { path, .. } => Some(path), + Repo::Remote { path, .. } | Repo::SelfRepo { path, .. } => Some(path), _ => None, } } @@ -213,10 +234,11 @@ impl Repo { /// Get a hook by id. pub(crate) fn get_hook(&self, id: &str) -> Option<&HookSpec> { let hooks = match self { - Repo::Remote { hooks, .. } => hooks, - Repo::Local { hooks } => hooks, - Repo::Meta { hooks } => hooks, - Repo::Builtin { hooks } => hooks, + Repo::Remote { hooks, .. } + | Repo::SelfRepo { hooks, .. } + | Repo::Local { hooks } + | Repo::Meta { hooks } + | Repo::Builtin { hooks } => hooks, }; hooks.iter().find(|hook| hook.id == id) } @@ -229,6 +251,7 @@ impl Display for Repo { Repo::Local { .. } => write!(f, "local"), Repo::Meta { .. } => write!(f, "meta"), Repo::Builtin { .. } => write!(f, "builtin"), + Repo::SelfRepo { .. } => write!(f, "self"), } } } @@ -557,14 +580,21 @@ impl Hook { /// Dependencies used to identify whether an existing hook environment can be reused. /// /// For remote hooks, the repo URL is included to avoid reusing an environment created - /// from a different remote repository. + /// from a different remote repository. For self-repo hooks, the project path is included + /// to avoid reusing an environment created from a different project. pub(crate) fn env_key_dependencies(&self) -> &FxHashSet { - if !self.is_remote() { - return &self.additional_dependencies; + match &*self.repo { + Repo::Remote { .. } => self.dependencies.get_or_init(|| { + env_key_dependencies(&self.additional_dependencies, Some(&self.repo.to_string())) + }), + Repo::SelfRepo { path, .. } => self.dependencies.get_or_init(|| { + env_key_dependencies( + &self.additional_dependencies, + Some(&self_repo_dependency(path)), + ) + }), + _ => &self.additional_dependencies, } - self.dependencies.get_or_init(|| { - env_key_dependencies(&self.additional_dependencies, Some(&self.repo.to_string())) - }) } /// Returns a lightweight view of the hook environment identity used for reusing installs. @@ -632,6 +662,14 @@ fn env_key_dependencies( deps } +/// Build the dependency token used to isolate self-repo hook environments per project. +pub(crate) fn self_repo_dependency(project_root: &Path) -> String { + fs_err::canonicalize(project_root) + .unwrap_or_else(|_| project_root.to_path_buf()) + .to_string_lossy() + .to_string() +} + /// Shared matching logic between a computed hook env key (owned or borrowed) and an installed /// environment described by [`InstallInfo`]. fn matches_install_info( diff --git a/crates/prek/src/hooks/meta_hooks.rs b/crates/prek/src/hooks/meta_hooks.rs index 16cbca32..955bded7 100644 --- a/crates/prek/src/hooks/meta_hooks.rs +++ b/crates/prek/src/hooks/meta_hooks.rs @@ -203,6 +203,7 @@ pub(crate) async fn check_useless_excludes( config::Repo::Local(r) => Box::new(r.hooks.iter().map(|h| (&h.id, &h.options))), config::Repo::Meta(r) => Box::new(r.hooks.iter().map(|h| (&h.id, &h.options))), config::Repo::Builtin(r) => Box::new(r.hooks.iter().map(|h| (&h.id, &h.options))), + config::Repo::SelfRepo(r) => Box::new(r.hooks.iter().map(|h| (&h.id, &h.options))), }; for (hook_id, opts) in hooks_iter { diff --git a/crates/prek/src/languages/mod.rs b/crates/prek/src/languages/mod.rs index 9043e195..e69fc9a8 100644 --- a/crates/prek/src/languages/mod.rs +++ b/crates/prek/src/languages/mod.rs @@ -282,7 +282,7 @@ impl Language { return hooks::run_fast_path(store, hook, filenames, reporter).await; } } - Repo::Local { .. } => {} + Repo::Local { .. } | Repo::SelfRepo { .. } => {} } match self { diff --git a/crates/prek/src/schema.rs b/crates/prek/src/schema.rs index 6aaaf337..91122f19 100644 --- a/crates/prek/src/schema.rs +++ b/crates/prek/src/schema.rs @@ -2,7 +2,7 @@ use std::borrow::Cow; use crate::config::{ BuiltinHook, BuiltinRepo, FilePattern, LocalRepo, MetaHook, MetaRepo, RemoteHook, RemoteRepo, - Repo, + Repo, SelfRepo, }; impl schemars::JsonSchema for FilePattern { @@ -141,15 +141,23 @@ pub(crate) fn schema_repo_builtin( }) } +pub(crate) fn schema_repo_self(_gen: &mut schemars::generate::SchemaGenerator) -> schemars::Schema { + schemars::json_schema!({ + "type": "string", + "const": "self", + "description": "Must be `self`. References the project's own `.pre-commit-hooks.yaml`.", + }) +} + pub(crate) fn schema_repo_remote( _gen: &mut schemars::generate::SchemaGenerator, ) -> schemars::Schema { schemars::json_schema!({ "type": "string", "not": { - "enum": ["local", "meta", "builtin"], + "enum": ["local", "meta", "builtin", "self"], }, - "description": "Remote repository location. Must not be `local`, `meta`, or `builtin`.", + "description": "Remote repository location. Must not be `local`, `meta`, `builtin`, or `self`.", }) } @@ -163,15 +171,17 @@ impl schemars::JsonSchema for Repo { let local_schema = r#gen.subschema_for::(); let meta_schema = r#gen.subschema_for::(); let builtin_schema = r#gen.subschema_for::(); + let self_schema = r#gen.subschema_for::(); schemars::json_schema!({ "type": "object", - "description": "A repository of hooks, which can be remote, local, meta, or builtin.", + "description": "A repository of hooks, which can be remote, local, meta, builtin, or self.", "oneOf": [ remote_schema, local_schema, meta_schema, builtin_schema, + self_schema, ], }) } diff --git a/crates/prek/src/store.rs b/crates/prek/src/store.rs index 4c9989ed..a2f62bc1 100644 --- a/crates/prek/src/store.rs +++ b/crates/prek/src/store.rs @@ -7,13 +7,14 @@ use anyhow::Result; use etcetera::BaseStrategy; use futures::StreamExt; use rustc_hash::FxHashSet; +use serde::{Deserialize, Serialize}; use thiserror::Error; use tracing::{debug, warn}; use prek_consts::env_vars::EnvVars; use crate::config::RemoteRepo; -use crate::fs::LockedFile; +use crate::fs::{CWD, LockedFile}; use crate::git::clone_repo; use crate::hook::InstallInfo; use crate::run::CONCURRENCY; @@ -43,6 +44,19 @@ fn expand_tilde(path: PathBuf) -> PathBuf { pub(crate) const REPO_MARKER: &str = ".prek-repo.json"; +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub(crate) struct TrackedConfig { + pub(crate) path: PathBuf, + pub(crate) project_root: PathBuf, +} + +#[derive(Debug, Deserialize)] +#[serde(untagged)] +enum TrackedConfigsFile { + Entries(Vec), + LegacyPaths(Vec), +} + /// A store for managing repos. #[derive(Debug)] pub struct Store { @@ -231,16 +245,29 @@ impl Store { /// /// Seed `config-tracking.json` from the workspace discovery cache if it doesn't exist. /// This is a one-time upgrade helper: it only does work when tracking is empty. - pub(crate) fn tracked_configs(&self) -> Result, Error> { + pub(crate) fn tracked_configs(&self) -> Result, Error> { let tracking_file = self.config_tracking_file(); match fs_err::read_to_string(&tracking_file) { Err(e) if e.kind() == std::io::ErrorKind::NotFound => {} Err(e) => return Err(e.into()), Ok(content) => { - let tracked = serde_json::from_str(&content).unwrap_or_else(|e| { - warn!("Failed to parse config tracking file: {e}, resetting"); - FxHashSet::default() - }); + let tracked = match serde_json::from_str::(&content) { + Ok(TrackedConfigsFile::Entries(entries)) => entries + .into_iter() + .map(|entry| TrackedConfig { + path: normalize_path(entry.path), + project_root: normalize_path(entry.project_root), + }) + .collect(), + Ok(TrackedConfigsFile::LegacyPaths(paths)) => paths + .into_iter() + .map(tracked_config_from_config_path) + .collect(), + Err(e) => { + warn!("Failed to parse config tracking file: {e}, resetting"); + FxHashSet::default() + } + }; return Ok(tracked); } } @@ -249,6 +276,10 @@ impl Store { if cached.is_empty() { return Ok(FxHashSet::default()); } + let cached = cached + .into_iter() + .map(tracked_config_from_config_path) + .collect::>(); debug!( count = cached.len(), @@ -262,11 +293,14 @@ impl Store { /// Track new config files for GC. pub(crate) fn track_configs<'a>( &self, - config_paths: impl Iterator, + configs: impl Iterator, ) -> Result<(), Error> { let mut tracked = self.tracked_configs()?; - for config_path in config_paths { - tracked.insert(config_path.to_path_buf()); + for (config_path, project_root) in configs { + tracked.insert(TrackedConfig { + path: normalize_path(config_path.to_path_buf()), + project_root: normalize_path(project_root.to_path_buf()), + }); } let tracking_file = self.config_tracking_file(); @@ -277,7 +311,10 @@ impl Store { } /// Update the tracked configs file. - pub(crate) fn update_tracked_configs(&self, configs: &FxHashSet) -> Result<(), Error> { + pub(crate) fn update_tracked_configs( + &self, + configs: &FxHashSet, + ) -> Result<(), Error> { let tracking_file = self.config_tracking_file(); let content = serde_json::to_string_pretty(configs)?; fs_err::write(&tracking_file, content)?; @@ -312,3 +349,19 @@ pub(crate) enum CacheBucket { fn to_hex(num: u64) -> String { hex::encode(num.to_le_bytes()) } + +fn normalize_path(mut path: PathBuf) -> PathBuf { + if path.is_relative() { + path = CWD.join(path); + } + fs_err::canonicalize(&path).unwrap_or(path) +} + +fn tracked_config_from_config_path(path: PathBuf) -> TrackedConfig { + let path = normalize_path(path); + let project_root = path + .parent() + .map(Path::to_path_buf) + .unwrap_or_else(|| path.clone()); + TrackedConfig { path, project_root } +} diff --git a/crates/prek/src/workspace.rs b/crates/prek/src/workspace.rs index e1a51e7e..bd53ff8f 100644 --- a/crates/prek/src/workspace.rs +++ b/crates/prek/src/workspace.rs @@ -18,7 +18,7 @@ use tracing::{debug, error, instrument, trace}; use crate::cli::run::Selectors; use crate::config::{self, Config, read_config}; -use crate::fs::Simplified; +use crate::fs::{CWD, Simplified}; use crate::git::GIT_ROOT; use crate::hook::HookSpec; use crate::hook::{self, Hook, HookBuilder, Repo}; @@ -116,6 +116,14 @@ impl Project { config_path: Cow<'_, Path>, root: Option, ) -> Result { + let mut config_path = config_path.into_owned(); + if config_path.is_relative() { + config_path = CWD.join(config_path); + } + if let Ok(canonical) = fs_err::canonicalize(&config_path) { + config_path = canonical; + } + debug!( path = %config_path.user_display(), "Loading project configuration" @@ -147,11 +155,12 @@ impl Project { } let root = root.unwrap_or_else(|| config_dir.to_path_buf()); + let root = fs_err::canonicalize(&root).unwrap_or(root); Ok(Self { root, config, - config_path: config_path.into_owned(), + config_path, idx: 0, relative_path: PathBuf::new(), repos: Vec::with_capacity(size), @@ -344,6 +353,10 @@ impl Project { let repo = Repo::builtin(repo.hooks.clone()); repos.push(Arc::new(repo)); } + config::Repo::SelfRepo(_) => { + let repo = Repo::self_repo(self.root.clone())?; + repos.push(Arc::new(repo)); + } } } @@ -424,6 +437,29 @@ impl Project { ); let hook = builder.build().await?; + hooks.push(hook); + } + } + config::Repo::SelfRepo(repo_config) => { + for hook_config in &repo_config.hooks { + let Some(manifest_hook) = repo.get_hook(&hook_config.id) else { + return Err(Error::HookNotFound { + hook: hook_config.id.clone(), + repo: repo.to_string(), + }); + }; + + let mut hook_spec = manifest_hook.clone(); + hook_spec.apply_remote_hook_overrides(hook_config); + + let builder = HookBuilder::new( + self.clone(), + Arc::clone(repo), + hook_spec, + hooks.len(), + ); + let hook = builder.build().await?; + hooks.push(hook); } } @@ -992,6 +1028,10 @@ impl Workspace { let repo = Repo::builtin(repo.hooks.clone()); repos.push(Arc::new(repo)); } + config::Repo::SelfRepo(_) => { + let repo = Repo::self_repo(project.root.clone())?; + repos.push(Arc::new(repo)); + } } } diff --git a/prek.schema.json b/prek.schema.json index 8b0427ca..5c3935e6 100644 --- a/prek.schema.json +++ b/prek.schema.json @@ -157,7 +157,7 @@ "additionalProperties": true, "definitions": { "Repo": { - "description": "A repository of hooks, which can be remote, local, meta, or builtin.", + "description": "A repository of hooks, which can be remote, local, meta, builtin, or self.", "type": "object", "oneOf": [ { @@ -171,6 +171,9 @@ }, { "$ref": "#/definitions/BuiltinRepo" + }, + { + "$ref": "#/definitions/SelfRepo" } ] }, @@ -178,13 +181,14 @@ "type": "object", "properties": { "repo": { - "description": "Remote repository location. Must not be `local`, `meta`, or `builtin`.", + "description": "Remote repository location. Must not be `local`, `meta`, `builtin`, or `self`.", "type": "string", "not": { "enum": [ "local", "meta", - "builtin" + "builtin", + "self" ] } }, @@ -1221,6 +1225,28 @@ "trailing-whitespace" ] }, + "SelfRepo": { + "type": "object", + "properties": { + "repo": { + "description": "Must be `self`. References the project's own `.pre-commit-hooks.yaml`.", + "type": "string", + "const": "self" + }, + "hooks": { + "type": "array", + "items": { + "$ref": "#/definitions/RemoteHook" + }, + "writeOnly": true + } + }, + "required": [ + "repo", + "hooks" + ], + "additionalProperties": true + }, "HookType": { "type": "string", "enum": [ From 6a74cca56864735d6ffd14cde8566ab8c07f0205 Mon Sep 17 00:00:00 2001 From: Shaan Majid <70789625+shaanmajid@users.noreply.github.com> Date: Sun, 8 Feb 2026 18:47:39 -0600 Subject: [PATCH 2/5] test: add integration tests for `repo: self` Cover system hook execution with file filtering, config overrides (name, args), missing manifest error, and unknown hook ID error. Verify cache GC retains self-repo hook environments across default, explicit relative, and subdirectory config paths. Verify GC prunes stale root-level envs when only a subdir config remains tracked. Add a cross-project isolation test: two projects sharing the same PREK_HOME must get separate hook environments. Fix the INSTA_FILTERS entry for Windows os-error normalization to use a targeted substitution instead of the broken greedy pattern. --- .../prek__config__tests__parse_self_repo.snap | 54 +++ ...tests__parse_self_repo_with_overrides.snap | 62 +++ crates/prek/tests/cache.rs | 429 +++++++++++++++++- crates/prek/tests/common/mod.rs | 4 +- crates/prek/tests/self_repo.rs | 260 +++++++++++ 5 files changed, 805 insertions(+), 4 deletions(-) create mode 100644 crates/prek/src/snapshots/prek__config__tests__parse_self_repo.snap create mode 100644 crates/prek/src/snapshots/prek__config__tests__parse_self_repo_with_overrides.snap create mode 100644 crates/prek/tests/self_repo.rs diff --git a/crates/prek/src/snapshots/prek__config__tests__parse_self_repo.snap b/crates/prek/src/snapshots/prek__config__tests__parse_self_repo.snap new file mode 100644 index 00000000..301abfd0 --- /dev/null +++ b/crates/prek/src/snapshots/prek__config__tests__parse_self_repo.snap @@ -0,0 +1,54 @@ +--- +source: crates/prek/src/config.rs +expression: result +--- +Config { + repos: [ + SelfRepo( + SelfRepo { + repo: "self", + hooks: [ + RemoteHook { + id: "my-hook", + name: None, + entry: None, + language: None, + priority: None, + options: HookOptions { + alias: None, + files: None, + exclude: None, + types: None, + types_or: None, + exclude_types: None, + additional_dependencies: None, + args: None, + env: None, + always_run: None, + fail_fast: None, + pass_filenames: None, + description: None, + language_version: None, + log_file: None, + require_serial: None, + stages: None, + verbose: None, + minimum_prek_version: None, + _unused_keys: {}, + }, + }, + ], + _unused_keys: {}, + }, + ), + ], + default_install_hook_types: None, + default_language_version: None, + default_stages: None, + files: None, + exclude: None, + fail_fast: None, + minimum_prek_version: None, + orphan: None, + _unused_keys: {}, +} diff --git a/crates/prek/src/snapshots/prek__config__tests__parse_self_repo_with_overrides.snap b/crates/prek/src/snapshots/prek__config__tests__parse_self_repo_with_overrides.snap new file mode 100644 index 00000000..7f62d352 --- /dev/null +++ b/crates/prek/src/snapshots/prek__config__tests__parse_self_repo_with_overrides.snap @@ -0,0 +1,62 @@ +--- +source: crates/prek/src/config.rs +expression: result +--- +Config { + repos: [ + SelfRepo( + SelfRepo { + repo: "self", + hooks: [ + RemoteHook { + id: "my-hook", + name: None, + entry: None, + language: None, + priority: None, + options: HookOptions { + alias: None, + files: Some( + Regex( + ^src/, + ), + ), + exclude: None, + types: None, + types_or: None, + exclude_types: None, + additional_dependencies: None, + args: Some( + [ + "--flag", + ], + ), + env: None, + always_run: None, + fail_fast: None, + pass_filenames: None, + description: None, + language_version: None, + log_file: None, + require_serial: None, + stages: None, + verbose: None, + minimum_prek_version: None, + _unused_keys: {}, + }, + }, + ], + _unused_keys: {}, + }, + ), + ], + default_install_hook_types: None, + default_language_version: None, + default_stages: None, + files: None, + exclude: None, + fail_fast: None, + minimum_prek_version: None, + orphan: None, + _unused_keys: {}, +} diff --git a/crates/prek/tests/cache.rs b/crates/prek/tests/cache.rs index 315657f8..5b77cfed 100644 --- a/crates/prek/tests/cache.rs +++ b/crates/prek/tests/cache.rs @@ -556,6 +556,431 @@ fn cache_gc_keeps_local_hook_env() -> anyhow::Result<()> { Ok(()) } +#[test] +fn cache_gc_keeps_self_repo_hook_env() -> anyhow::Result<()> { + let context = TestContext::new(); + context.init_project(); + + let cwd = context.work_dir(); + + // Create a manifest with a Python hook (non-system language). + cwd.child(".pre-commit-hooks.yaml") + .write_str(indoc::indoc! {r#" + - id: self-python + name: Self Python Hook + entry: python -c "print('hello from self')" + language: python + "#})?; + + // Self-repo Python hooks install the project root as a package, so provide + // a minimal setup.py to keep the installer happy. + cwd.child("setup.py") + .write_str("from setuptools import setup; setup(name='dummy', version='0.0.1')")?; + + context.write_pre_commit_config(indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: self-python + "}); + + cwd.child("file.txt").write_str("Hello\n")?; + context.git_add("."); + + // Install + run the self-repo hook so it creates a hook env under PREK_HOME/hooks. + cmd_snapshot!(context.filters(), context.run(), @r" + success: true + exit_code: 0 + ----- stdout ----- + Self Python Hook.........................................................Passed + + ----- stderr ----- + "); + + let home = context.home_dir(); + let hooks_dir = home.child("hooks"); + + let mut self_envs = Vec::new(); + for entry in fs_err::read_dir(hooks_dir.path())? { + let entry = entry?; + if !entry.file_type()?.is_dir() { + continue; + } + + let name = entry.file_name().to_string_lossy().to_string(); + if name.starts_with("python-") { + self_envs.push(name); + } + } + + assert!( + !self_envs.is_empty(), + "expected at least one self-repo hook env" + ); + + // Add an obviously-unused entry to ensure GC does work. + home.child("hooks/unused-hook-env").create_dir_all()?; + + cmd_snapshot!(context.filters(), context.command().args(["cache", "gc"]), @r" + success: true + exit_code: 0 + ----- stdout ----- + Removed 1 hook env ([SIZE]) + + ----- stderr ----- + "); + + // The self-repo hook env(s) should remain. + for env in self_envs { + home.child(format!("hooks/{env}")) + .assert(predicates::path::is_dir()); + } + // Unused should be swept. + home.child("hooks/unused-hook-env") + .assert(predicates::path::missing()); + + Ok(()) +} + +#[test] +fn cache_gc_keeps_self_repo_hook_env_with_explicit_relative_config_path() -> anyhow::Result<()> { + let context = TestContext::new(); + context.init_project(); + + let cwd = context.work_dir(); + + // Create a manifest with a Python hook (non-system language). + cwd.child(".pre-commit-hooks.yaml") + .write_str(indoc::indoc! {r#" + - id: self-python + name: Self Python Hook + entry: python -c "print('hello from self')" + language: python + "#})?; + + // Self-repo Python hooks install the project root as a package, so provide + // a minimal setup.py to keep the installer happy. + cwd.child("setup.py") + .write_str("from setuptools import setup; setup(name='dummy', version='0.0.1')")?; + + context.write_pre_commit_config(indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: self-python + "}); + + cwd.child("file.txt").write_str("Hello\n")?; + context.git_add("."); + + // Run with an explicit relative --config path to exercise path normalization. + cmd_snapshot!( + context.filters(), + context + .command() + .args(["run", "--config", ".pre-commit-config.yaml"]), + @r" + success: true + exit_code: 0 + ----- stdout ----- + Self Python Hook.........................................................Passed + + ----- stderr ----- + " + ); + + let home = context.home_dir(); + let hooks_dir = home.child("hooks"); + + let mut self_envs = Vec::new(); + for entry in fs_err::read_dir(hooks_dir.path())? { + let entry = entry?; + if !entry.file_type()?.is_dir() { + continue; + } + + let name = entry.file_name().to_string_lossy().to_string(); + if name.starts_with("python-") { + self_envs.push(name); + } + } + + assert!( + !self_envs.is_empty(), + "expected at least one self-repo hook env" + ); + + // Add an obviously-unused entry to ensure GC does work. + home.child("hooks/unused-hook-env").create_dir_all()?; + + cmd_snapshot!(context.filters(), context.command().args(["cache", "gc"]), @r" + success: true + exit_code: 0 + ----- stdout ----- + Removed 1 hook env ([SIZE]) + + ----- stderr ----- + "); + + // The self-repo hook env(s) should remain. + for env in self_envs { + home.child(format!("hooks/{env}")) + .assert(predicates::path::is_dir()); + } + // Unused should be swept. + home.child("hooks/unused-hook-env") + .assert(predicates::path::missing()); + + Ok(()) +} + +#[test] +fn cache_gc_keeps_self_repo_hook_env_with_subdir_config_path() -> anyhow::Result<()> { + let context = TestContext::new(); + context.init_project(); + + let cwd = context.work_dir(); + let subdir = cwd.child("sub"); + subdir.create_dir_all()?; + + // Manifest stays at git root. + cwd.child(".pre-commit-hooks.yaml") + .write_str(indoc::indoc! {r#" + - id: self-python + name: Self Python Hook + entry: python -c "print('hello from self')" + language: python + "#})?; + + cwd.child("setup.py") + .write_str("from setuptools import setup; setup(name='dummy', version='0.0.1')")?; + + // Config lives in a subdirectory and references repo:self. + subdir + .child(".pre-commit-config.yaml") + .write_str(indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: self-python + "})?; + + cwd.child("file.txt").write_str("Hello\n")?; + context.git_add("."); + + cmd_snapshot!( + context.filters(), + context + .command() + .args(["run", "--config", "sub/.pre-commit-config.yaml"]), + @r" + success: true + exit_code: 0 + ----- stdout ----- + Self Python Hook.........................................................Passed + + ----- stderr ----- + " + ); + + let home = context.home_dir(); + let hooks_dir = home.child("hooks"); + + let mut self_envs = Vec::new(); + for entry in fs_err::read_dir(hooks_dir.path())? { + let entry = entry?; + if !entry.file_type()?.is_dir() { + continue; + } + + let name = entry.file_name().to_string_lossy().to_string(); + if name.starts_with("python-") { + self_envs.push(name); + } + } + + assert!( + !self_envs.is_empty(), + "expected at least one self-repo hook env" + ); + + home.child("hooks/unused-hook-env").create_dir_all()?; + + cmd_snapshot!(context.filters(), context.command().args(["cache", "gc"]), @r" + success: true + exit_code: 0 + ----- stdout ----- + Removed 1 hook env ([SIZE]) + + ----- stderr ----- + "); + + for env in self_envs { + home.child(format!("hooks/{env}")) + .assert(predicates::path::is_dir()); + } + home.child("hooks/unused-hook-env") + .assert(predicates::path::missing()); + + Ok(()) +} + +#[test] +fn cache_gc_prunes_stale_root_self_repo_env_when_only_subdir_config_tracked() -> anyhow::Result<()> +{ + let context = TestContext::new(); + context.init_project(); + + let cwd = context.work_dir(); + let subdir = cwd.child("sub"); + subdir.create_dir_all()?; + + // Root manifest + config. + cwd.child(".pre-commit-hooks.yaml") + .write_str(indoc::indoc! {r#" + - id: shared + name: Root Hook + entry: python -c "print('root')" + language: python + "#})?; + cwd.child("setup.py") + .write_str("from setuptools import setup; setup(name='rootpkg', version='0.0.1')")?; + context.write_pre_commit_config(indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: shared + "}); + + // Subdir manifest + config with same hook id. + subdir + .child(".pre-commit-hooks.yaml") + .write_str(indoc::indoc! {r#" + - id: shared + name: Sub Hook + entry: python -c "print('sub')" + language: python + "#})?; + subdir + .child("setup.py") + .write_str("from setuptools import setup; setup(name='subpkg', version='0.0.1')")?; + subdir + .child(".pre-commit-config.yaml") + .write_str(indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: shared + "})?; + + cwd.child("file.txt").write_str("Hello\n")?; + context.git_add("."); + + // Install root env. + cmd_snapshot!( + context.filters(), + context.command().args(["run", "--config", ".pre-commit-config.yaml"]), + @r" + success: true + exit_code: 0 + ----- stdout ----- + Root Hook................................................................Passed + + ----- stderr ----- + " + ); + + // Drop root config; only subdir config should remain tracked after GC. + fs_err::remove_file(cwd.child(".pre-commit-config.yaml").path())?; + context.git_add("."); + + // Install subdir env and ensure subdir config is tracked. + cmd_snapshot!( + context.filters(), + context.command().arg("run").current_dir(subdir.path()), + @r" + success: true + exit_code: 0 + ----- stdout ----- + Sub Hook.................................................................Passed + + ----- stderr ----- + " + ); + + let home = context.home_dir(); + let hooks_dir = home.child("hooks"); + let root_dep = fs_err::canonicalize(cwd.path()) + .unwrap_or_else(|_| cwd.path().to_path_buf()) + .to_string_lossy() + .to_string(); + let sub_dep = fs_err::canonicalize(subdir.path()) + .unwrap_or_else(|_| subdir.path().to_path_buf()) + .to_string_lossy() + .to_string(); + + let mut root_envs = Vec::new(); + let mut sub_envs = Vec::new(); + for entry in fs_err::read_dir(hooks_dir.path())? { + let entry = entry?; + if !entry.file_type()?.is_dir() { + continue; + } + let env_dir = entry.path(); + let marker_path = env_dir.join(".prek-hook.json"); + if !marker_path.is_file() { + continue; + } + let marker = fs_err::read_to_string(&marker_path)?; + let marker_json: serde_json::Value = serde_json::from_str(&marker)?; + let deps = marker_json["dependencies"] + .as_array() + .cloned() + .unwrap_or_default(); + let dep_strings: Vec = deps + .into_iter() + .filter_map(|v| v.as_str().map(ToString::to_string)) + .collect(); + if dep_strings.iter().any(|d| d == &root_dep) { + root_envs.push(env_dir.clone()); + } + if dep_strings.iter().any(|d| d == &sub_dep) { + sub_envs.push(env_dir); + } + } + assert!(!root_envs.is_empty(), "expected at least one root self env"); + assert!( + !sub_envs.is_empty(), + "expected at least one subdir self env" + ); + + cmd_snapshot!(context.filters(), context.command().args(["cache", "gc"]), @r" + success: true + exit_code: 0 + ----- stdout ----- + Removed 1 hook env ([SIZE]) + + ----- stderr ----- + "); + + for env in root_envs { + assert!( + !env.exists(), + "expected stale root self env to be pruned: {}", + env.display() + ); + } + for env in sub_envs { + assert!( + env.exists(), + "expected active subdir self env to remain: {}", + env.display() + ); + } + + Ok(()) +} + fn write_config_tracking_file( home: &ChildPath, configs: &[&std::path::Path], @@ -673,7 +1098,7 @@ fn cache_gc_drops_missing_tracked_config() -> anyhow::Result<()> { // Tracking file should be updated to drop the missing config. let content = fs_err::read_to_string(home.child("config-tracking.json").path())?; - let tracked: Vec = serde_json::from_str(&content)?; + let tracked: Vec = serde_json::from_str(&content)?; assert!(tracked.is_empty()); // Scratch and patches are always cleared when GC runs. @@ -714,7 +1139,7 @@ fn cache_gc_keeps_tracked_config_on_parse_error() -> anyhow::Result<()> { // Parse errors should not drop the config from tracking. let content = fs_err::read_to_string(home.child("config-tracking.json").path())?; - let tracked: Vec = serde_json::from_str(&content)?; + let tracked: Vec = serde_json::from_str(&content)?; assert_eq!(tracked.len(), 1); Ok(()) diff --git a/crates/prek/tests/common/mod.rs b/crates/prek/tests/common/mod.rs index e8d07785..029e4fdd 100644 --- a/crates/prek/tests/common/mod.rs +++ b/crates/prek/tests/common/mod.rs @@ -400,8 +400,8 @@ pub const INSTA_FILTERS: &[(&str, &str)] = &[ (r"\\([\w\d]|\.\.|\.)", "/$1"), // The exact message is host language dependent ( - r"Caused by: .* \(os error 2\)", - "Caused by: No such file or directory (os error 2)", + r"The system cannot find the file specified\. \(os error 2\)", + "No such file or directory (os error 2)", ), // Time seconds (r"\b(\d+\.)?\d+(ms|s)\b", "[TIME]"), diff --git a/crates/prek/tests/self_repo.rs b/crates/prek/tests/self_repo.rs new file mode 100644 index 00000000..14e8c291 --- /dev/null +++ b/crates/prek/tests/self_repo.rs @@ -0,0 +1,260 @@ +mod common; + +use crate::common::{TestContext, cmd_snapshot, git_cmd}; + +use assert_cmd::assert::OutputAssertExt; +use assert_fs::fixture::{FileWriteStr, PathChild, PathCreateDir}; +use prek_consts::env_vars::EnvVars; + +#[test] +fn self_repo_system_hook() -> anyhow::Result<()> { + let context = TestContext::new(); + context.init_project(); + + let cwd = context.work_dir(); + + cwd.child(".pre-commit-hooks.yaml") + .write_str("- id: echo-hook\n name: echo hook\n entry: echo\n language: system\n files: \"\\\\.(txt|md)$\"\n")?; + + cwd.child("hello.txt").write_str("Hello\n")?; + cwd.child("ignored.rs").write_str("fn main() {}\n")?; + + context.write_pre_commit_config(indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: echo-hook + "}); + context.git_add("."); + + cmd_snapshot!(context.filters(), context.run(), @r" + success: true + exit_code: 0 + ----- stdout ----- + echo hook................................................................Passed + + ----- stderr ----- + "); + + Ok(()) +} + +#[test] +fn self_repo_with_overrides() -> anyhow::Result<()> { + let context = TestContext::new(); + context.init_project(); + + let cwd = context.work_dir(); + + cwd.child(".pre-commit-hooks.yaml") + .write_str(indoc::indoc! {r" + - id: echo-hook + name: echo hook + entry: echo + language: system + "})?; + + cwd.child("hello.txt").write_str("Hello\n")?; + + context.write_pre_commit_config(indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: echo-hook + name: overridden name + args: [--verbose] + "}); + context.git_add("."); + + cmd_snapshot!(context.filters(), context.run(), @r" + success: true + exit_code: 0 + ----- stdout ----- + overridden name..........................................................Passed + + ----- stderr ----- + "); + + Ok(()) +} + +#[test] +fn self_repo_missing_manifest() { + let context = TestContext::new(); + context.init_project(); + + context + .work_dir() + .child("hello.txt") + .write_str("Hello\n") + .unwrap(); + + context.write_pre_commit_config(indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: my-hook + "}); + context.git_add("."); + + cmd_snapshot!(context.filters(), context.run(), @r" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: Failed to init hooks + caused by: Failed to read manifest of `self` + caused by: failed to open file `[TEMP_DIR]/.pre-commit-hooks.yaml`: No such file or directory (os error 2) + "); +} + +#[test] +fn self_repo_unknown_hook_id() -> anyhow::Result<()> { + let context = TestContext::new(); + context.init_project(); + + let cwd = context.work_dir(); + + cwd.child(".pre-commit-hooks.yaml") + .write_str(indoc::indoc! {r" + - id: real-hook + name: real hook + entry: echo + language: system + "})?; + + cwd.child("hello.txt").write_str("Hello\n")?; + + context.write_pre_commit_config(indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: nonexistent-hook + "}); + context.git_add("."); + + cmd_snapshot!(context.filters(), context.run(), @r" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: Failed to init hooks + caused by: Hook `nonexistent-hook` not present in repo `self` + "); + + Ok(()) +} + +/// Two projects sharing the same `PREK_HOME` must not reuse each other's +/// self-repo hook environments. Each project installs its own package +/// into the env, so sharing would cause incorrect behavior. +#[test] +fn self_repo_cross_project_env_isolation() -> anyhow::Result<()> { + // Use one TestContext for the shared PREK_HOME. + let context = TestContext::new(); + let home = context.home_dir(); + + // Set up two independent projects under the context root. + let project_a = context.work_dir().child("project_a"); + let project_b = context.work_dir().child("project_b"); + project_a.create_dir_all()?; + project_b.create_dir_all()?; + + for (project, label) in [(&project_a, "a"), (&project_b, "b")] { + git_cmd(project) + .arg("-c") + .arg("init.defaultBranch=master") + .arg("init") + .assert() + .success(); + + project.child(".pre-commit-hooks.yaml").write_str(&format!( + indoc::indoc! {r#" + - id: greet + name: greet + entry: python -c "print('from-{label}')" + language: python + "#}, + label = label, + ))?; + + project.child("setup.py").write_str(&format!( + "from setuptools import setup; setup(name='project-{label}', version='0.0.1')", + ))?; + + project + .child(".pre-commit-config.yaml") + .write_str(indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: greet + "})?; + + project.child("file.txt").write_str("Hello\n")?; + + git_cmd(project).args(["add", "."]).assert().success(); + } + + let prek_bin = EnvVars::var_os("NEXTEST_BIN_EXE_prek") + .map(std::path::PathBuf::from) + .unwrap_or_else(|| std::path::PathBuf::from(assert_cmd::cargo::cargo_bin!("prek"))); + + // Run project A — installs its env. + let output_a = std::process::Command::new(&prek_bin) + .arg("run") + .current_dir(&*project_a) + .env(EnvVars::PREK_HOME, &**home) + .env(EnvVars::PREK_INTERNAL__SORT_FILENAMES, "1") + .env("GIT_CONFIG_COUNT", "1") + .env("GIT_CONFIG_KEY_0", "core.autocrlf") + .env("GIT_CONFIG_VALUE_0", "false") + .output()?; + + let stdout_a = String::from_utf8_lossy(&output_a.stdout); + assert!( + output_a.status.success(), + "project A failed:\nstdout: {stdout_a}\nstderr: {}", + String::from_utf8_lossy(&output_a.stderr), + ); + assert!(stdout_a.contains("Passed"), "project A hook did not pass"); + + // Run project B — must create a separate env, not reuse A's. + let output_b = std::process::Command::new(&prek_bin) + .arg("run") + .current_dir(&*project_b) + .env(EnvVars::PREK_HOME, &**home) + .env(EnvVars::PREK_INTERNAL__SORT_FILENAMES, "1") + .env("GIT_CONFIG_COUNT", "1") + .env("GIT_CONFIG_KEY_0", "core.autocrlf") + .env("GIT_CONFIG_VALUE_0", "false") + .output()?; + + let stdout_b = String::from_utf8_lossy(&output_b.stdout); + assert!( + output_b.status.success(), + "project B failed:\nstdout: {stdout_b}\nstderr: {}", + String::from_utf8_lossy(&output_b.stderr), + ); + assert!(stdout_b.contains("Passed"), "project B hook did not pass"); + + // Verify the hook envs are distinct (at least 2 python-* dirs under hooks/). + let hooks_dir = home.child("hooks"); + let python_envs: Vec<_> = fs_err::read_dir(hooks_dir.path())? + .filter_map(Result::ok) + .filter(|e| { + e.file_type().map(|t| t.is_dir()).unwrap_or(false) + && e.file_name().to_string_lossy().starts_with("python-") + }) + .collect(); + + assert!( + python_envs.len() >= 2, + "expected at least 2 separate python hook envs, found {}", + python_envs.len(), + ); + + Ok(()) +} From 05ebbe2b130c7a14b43e3bd2adda44b1562c3b69 Mon Sep 17 00:00:00 2001 From: Shaan Majid <70789625+shaanmajid@users.noreply.github.com> Date: Sun, 8 Feb 2026 18:47:48 -0600 Subject: [PATCH 3/5] docs: document `repo: self` feature Add `repo: self` section to configuration.md with TOML and YAML examples. Update the prek-specific extensions list and diff.md. --- docs/configuration.md | 50 +++++++++++++++++++++++++++++++++++++++++-- docs/diff.md | 1 + 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index f9b9c899..d0fde389 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -41,6 +41,7 @@ Notable differences (when using YAML): - **Workspace mode** is a `prek` feature that can discover multiple projects; upstream `pre-commit` is single-project. - `files` / `exclude` can be written as **glob mappings** in `prek` (in addition to regex), which is not supported by upstream `pre-commit`. - `repo: builtin` adds fast built-in hooks in `prek`. +- `repo: self` lets a project consume its own `.pre-commit-hooks.yaml` hooks. - Upstream `pre-commit` uses `minimum_pre_commit_version`, while `prek` uses `minimum_prek_version` and intentionally ignores `minimum_pre_commit_version`. ### Prek-only extensions @@ -53,6 +54,7 @@ They work in both YAML and TOML, but they only matter for compatibility if you s - [`orphan`](#prek-only-orphan) - Repo type: - [`repo: builtin`](#prek-only-repo-builtin) + - [`repo: self`](#prek-only-repo-self) - Hook-level: - [`env`](#prek-only-env) - [`priority`](#prek-only-priority) @@ -279,6 +281,7 @@ Each entry is one of: - `repo: local` for hooks defined directly in your repository - `repo: meta` for built-in meta hooks - `repo: builtin` for `prek`'s built-in fast hooks +- `repo: self` for consuming hooks from the project's own `.pre-commit-hooks.yaml` See [Repo entries](#repo-entries). @@ -749,7 +752,50 @@ Example: - id: check-yaml ``` -For the list of available built-in hooks and the “automatic fast path” behavior, see [Built-in Fast Hooks](builtin.md). +For the list of available built-in hooks and the "automatic fast path" behavior, see [Built-in Fast Hooks](builtin.md). + +#### `repo: self` + + + +!!! note "prek-only" + + `repo: self` is specific to `prek` and is not compatible with upstream `pre-commit`. + +For projects that publish hooks via a `.pre-commit-hooks.yaml` manifest, `repo: self` +lets you consume those same hooks without duplicating their definitions as `repo: local`. + +`prek` reads `.pre-commit-hooks.yaml` from the project root and resolves hooks by `id`. +Only `id` is required — all other fields are optional overrides, the same as remote hooks. + +`rev` is not allowed — the manifest is always read from the current project directory. + +If `.pre-commit-hooks.yaml` does not exist in the project root, `prek` reports an error +at hook initialization time. + +Example: + +=== "prek.toml" + + ```toml + [[repos]] + repo = "self" + hooks = [ + { id = "format-json" }, + { id = "lint-shell", args = ["--severity=error"] }, + ] + ``` + +=== ".pre-commit-config.yaml" + + ```yaml + repos: + - repo: self + hooks: + - id: format-json + - id: lint-shell + args: [--severity=error] + ``` ### Hook entries @@ -784,7 +830,7 @@ You can optionally provide `name` and normal hook options (filters, stages, etc) ### Common hook options -These keys can appear on hooks (remote/local/builtin/meta), subject to the restrictions above. +These keys can appear on hooks (remote/local/builtin/meta/self), subject to the restrictions above. #### `id` diff --git a/docs/diff.md b/docs/diff.md index fc4ce95d..f634fb41 100644 --- a/docs/diff.md +++ b/docs/diff.md @@ -5,6 +5,7 @@ - `prek` supports both `.pre-commit-config.yaml` and `.pre-commit-config.yml` configuration files. - `prek` implements some common hooks from `pre-commit-hooks` in Rust for better performance. - `prek` supports `repo: builtin` for offline, zero-setup hooks. +- `prek` supports `repo: self` for projects that publish hooks via `.pre-commit-hooks.yaml` to consume their own hooks. - `prek` uses `~/.cache/prek` as the default cache directory for repos, environments and toolchains. - `prek` decoupled hook environment from their repositories, allowing shared toolchains and environments across hooks. - `prek` supports `language_version` as a semver specifier and automatically installs the required toolchains. From df0176a5d320683f10594eecedc87860540188d5 Mon Sep 17 00:00:00 2001 From: Shaan Majid <70789625+shaanmajid@users.noreply.github.com> Date: Mon, 9 Feb 2026 18:32:24 -0600 Subject: [PATCH 4/5] refactor: remove caching for self hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-repo hook environments are ephemeral — installed fresh each run and cleaned up after. Remove the persistence and GC machinery that treated them like cached remote-repo environments. Introduce SelfRepoEnvCleanupGuard with Drop-based cleanup, deduplicate env paths, and add integration tests for no-persistence and cross-project isolation semantics. --- crates/prek/src/cli/cache_clean.rs | 4 +- crates/prek/src/cli/cache_gc.rs | 60 +--- crates/prek/src/cli/run/run.rs | 222 +++++++++++---- crates/prek/src/hook.rs | 4 +- crates/prek/src/store.rs | 56 ++-- crates/prek/tests/cache.rs | 425 ----------------------------- crates/prek/tests/self_repo.rs | 157 +++++++++-- docs/configuration.md | 4 + docs/diff.md | 2 +- 9 files changed, 341 insertions(+), 593 deletions(-) diff --git a/crates/prek/src/cli/cache_clean.rs b/crates/prek/src/cli/cache_clean.rs index 488ce601..d7b67f44 100644 --- a/crates/prek/src/cli/cache_clean.rs +++ b/crates/prek/src/cli/cache_clean.rs @@ -16,7 +16,9 @@ pub(crate) fn cache_clean(store: &Store, printer: Printer) -> Result return Ok(ExitStatus::Success); } - if let Err(e) = fix_permissions(store.cache_path(CacheBucket::Go)) { + if let Err(e) = fix_permissions(store.cache_path(CacheBucket::Go)) + && e.kind() != io::ErrorKind::NotFound + { error!("Failed to fix permissions: {}", e); } diff --git a/crates/prek/src/cli/cache_gc.rs b/crates/prek/src/cli/cache_gc.rs index ed570cad..31ed1a54 100644 --- a/crates/prek/src/cli/cache_gc.rs +++ b/crates/prek/src/cli/cache_gc.rs @@ -5,7 +5,6 @@ use std::path::Path; use anyhow::Result; use owo_colors::OwoColorize; -use prek_consts::PRE_COMMIT_HOOKS_YAML; use rustc_hash::FxHashMap; use rustc_hash::FxHashSet; use strum::IntoEnumIterator; @@ -13,12 +12,10 @@ use tracing::{debug, trace, warn}; use crate::cli::ExitStatus; use crate::cli::cache_size::{dir_size_bytes, human_readable_bytes}; -use crate::config::{self, Error as ConfigError, Repo as ConfigRepo, load_config, read_manifest}; -use crate::hook::{ - HOOK_MARKER, HookEnvKey, HookSpec, InstallInfo, Repo as HookRepo, self_repo_dependency, -}; +use crate::config::{self, Error as ConfigError, Repo as ConfigRepo, load_config}; +use crate::hook::{HOOK_MARKER, HookEnvKey, HookSpec, InstallInfo, Repo as HookRepo}; use crate::printer::Printer; -use crate::store::{CacheBucket, REPO_MARKER, Store, ToolBucket, TrackedConfig}; +use crate::store::{CacheBucket, REPO_MARKER, Store, ToolBucket}; #[derive(Debug, Copy, Clone, Eq, PartialEq)] enum RemovalKind { @@ -150,7 +147,7 @@ pub(crate) async fn cache_gc( return Ok(ExitStatus::Success); } - let mut kept_configs: FxHashSet = FxHashSet::default(); + let mut kept_configs: FxHashSet = FxHashSet::default(); let mut used_repo_keys: FxHashSet = FxHashSet::default(); let mut used_hook_env_dirs: FxHashSet = FxHashSet::default(); let mut used_tools: FxHashSet = FxHashSet::default(); @@ -163,8 +160,7 @@ pub(crate) async fn cache_gc( let installed = store.installed_hooks().await; - for tracked in &tracked_configs { - let config_path = &tracked.path; + for config_path in &tracked_configs { let config = match load_config(config_path) { Ok(config) => { trace!(path = %config_path.display(), "Found tracked config"); @@ -176,19 +172,15 @@ pub(crate) async fn cache_gc( continue; } err => { - warn!(path = %config_path.display(), %err, "Failed to parse config, skipping for GC"); - kept_configs.insert(tracked.clone()); + debug!(path = %config_path.display(), %err, "Failed to parse config, skipping for GC"); + kept_configs.insert(config_path.clone()); continue; } }, }; - kept_configs.insert(tracked.clone()); + kept_configs.insert(config_path.clone()); - used_env_keys.extend(hook_env_keys_from_config( - store, - &tracked.project_root, - &config, - )); + used_env_keys.extend(hook_env_keys_from_config(store, &config)); // Mark repos referenced by this config (if present in store). // We do this via config parsing (no clone), so GC won't keep repos for missing configs. @@ -345,11 +337,7 @@ fn print_removed_details(printer: Printer, verb: &str, removal: &Removal) -> Res Ok(()) } -fn hook_env_keys_from_config( - store: &Store, - project_root: &Path, - config: &config::Config, -) -> Vec { +fn hook_env_keys_from_config(store: &Store, config: &config::Config) -> Vec { let mut keys = Vec::new(); for repo_config in &config.repos { @@ -403,33 +391,7 @@ fn hook_env_keys_from_config( } } } - ConfigRepo::SelfRepo(repo_config) => { - let manifest_path = project_root.join(PRE_COMMIT_HOOKS_YAML); - let Ok(manifest) = read_manifest(&manifest_path) else { - continue; - }; - let manifest_hooks: FxHashMap<&str, &config::ManifestHook> = manifest - .hooks - .iter() - .map(|hook| (hook.id.as_str(), hook)) - .collect(); - let project_root_dep = self_repo_dependency(project_root); - for hook_config in &repo_config.hooks { - let Some(manifest_hook) = manifest_hooks.get(hook_config.id.as_str()) else { - continue; - }; - let mut hook_spec: HookSpec = (*manifest_hook).clone().into(); - hook_spec.apply_remote_hook_overrides(hook_config); - match HookEnvKey::from_hook_spec(config, hook_spec, Some(&project_root_dep)) { - Ok(Some(key)) => keys.push(key), - Ok(None) => {} - Err(err) => { - warn!(hook = %hook_config.id, %err, "Failed to compute hook env key, skipping"); - } - } - } - } - _ => {} // Meta and builtin repos do not have cached hook envs. + _ => {} // Self, meta, and builtin repos do not have cached hook envs. } } diff --git a/crates/prek/src/cli/run/run.rs b/crates/prek/src/cli/run/run.rs index 2759e947..7c422e49 100644 --- a/crates/prek/src/cli/run/run.rs +++ b/crates/prek/src/cli/run/run.rs @@ -87,12 +87,7 @@ pub(crate) async fn run( let reporter = HookInitReporter::new(printer); let lock = store.lock_async().await?; - store.track_configs( - workspace - .projects() - .iter() - .map(|p| (p.config_file(), p.path())), - )?; + store.track_configs(workspace.projects().iter().map(|p| p.config_file()))?; let hooks = workspace .init_hooks(store, Some(&reporter)) @@ -166,57 +161,111 @@ pub(crate) async fn run( ); let reporter = HookInstallReporter::new(printer); let installed_hooks = install_hooks(filtered_hooks, store, &reporter).await?; + let self_repo_env_paths = collect_self_repo_env_paths(&installed_hooks); + let _self_repo_env_cleanup_guard = SelfRepoEnvCleanupGuard::new(self_repo_env_paths); + + async { + // Release the store lock. + drop(lock); + + // Clear any unstaged changes from the git working directory. + let mut _guard = None; + if should_stash { + _guard = Some( + WorkTreeKeeper::clean(store, workspace.root()) + .await + .context("Failed to clean work tree")?, + ); + } - // Release the store lock. - drop(lock); + set_env_vars(from_ref.as_ref(), to_ref.as_ref(), &extra_args); + + let filenames = collect_files( + workspace.root(), + CollectOptions { + hook_stage, + from_ref, + to_ref, + all_files, + files, + directories, + commit_msg_filename: extra_args.commit_msg_filename, + }, + ) + .await + .context("Failed to collect files")?; + + // Change to the workspace root directory. + std::env::set_current_dir(workspace.root()).with_context(|| { + format!( + "Failed to change directory to `{}`", + workspace.root().display() + ) + })?; + + run_hooks( + &workspace, + &installed_hooks, + filenames, + store, + show_diff_on_failure, + fail_fast, + dry_run, + verbose, + printer, + ) + .await + } + .await +} - // Clear any unstaged changes from the git working directory. - let mut _guard = None; - if should_stash { - _guard = Some( - WorkTreeKeeper::clean(store, workspace.root()) - .await - .context("Failed to clean work tree")?, - ); +fn collect_self_repo_env_paths(installed_hooks: &[InstalledHook]) -> Vec { + dedupe_paths(installed_hooks.iter().filter_map(|hook| { + matches!(hook.repo(), Repo::SelfRepo { .. }) + .then(|| hook.env_path().map(std::path::Path::to_path_buf)) + .flatten() + })) +} + +fn dedupe_paths(paths: impl IntoIterator) -> Vec { + let mut seen = FxHashSet::default(); + paths + .into_iter() + .filter(|path| seen.insert(path.clone())) + .collect() +} + +fn cleanup_self_repo_env_paths(paths: &[PathBuf]) { + for path in paths { + match fs_err::remove_dir_all(path) { + Ok(()) => { + debug!("Removed ephemeral self-repo hook env `{}`", path.display()); + } + Err(err) if err.kind() == std::io::ErrorKind::NotFound => {} + Err(err) => { + warn!( + "Failed to remove ephemeral self-repo hook env `{}`: {err}", + path.display() + ); + } + } } +} - set_env_vars(from_ref.as_ref(), to_ref.as_ref(), &extra_args); - - let filenames = collect_files( - workspace.root(), - CollectOptions { - hook_stage, - from_ref, - to_ref, - all_files, - files, - directories, - commit_msg_filename: extra_args.commit_msg_filename, - }, - ) - .await - .context("Failed to collect files")?; +struct SelfRepoEnvCleanupGuard { + paths: Vec, +} - // Change to the workspace root directory. - std::env::set_current_dir(workspace.root()).with_context(|| { - format!( - "Failed to change directory to `{}`", - workspace.root().display() - ) - })?; - - run_hooks( - &workspace, - &installed_hooks, - filenames, - store, - show_diff_on_failure, - fail_fast, - dry_run, - verbose, - printer, - ) - .await +impl SelfRepoEnvCleanupGuard { + fn new(paths: Vec) -> Self { + Self { paths } + } +} + +impl Drop for SelfRepoEnvCleanupGuard { + fn drop(&mut self) { + cleanup_self_repo_env_paths(&self.paths); + } } // `pre-commit` sets these environment variables for other git hooks. @@ -376,7 +425,7 @@ pub async fn install_hooks( } } - if matched_info.is_none() { + if matched_info.is_none() && !matches!(hook.repo(), Repo::SelfRepo { .. }) { for env in store_hooks.iter() { if env.matches(&hook) { if env.ensure_healthy().await { @@ -404,10 +453,14 @@ pub async fn install_hooks( .await .with_context(|| format!("Failed to install hook `{hook}`"))?; - installed_hook - .mark_as_installed(store) - .await - .with_context(|| format!("Failed to mark hook `{hook}` as installed"))?; + if !matches!(hook.repo(), Repo::SelfRepo { .. }) { + installed_hook + .mark_as_installed(store) + .await + .with_context(|| { + format!("Failed to mark hook `{hook}` as installed") + })?; + } match &installed_hook { InstalledHook::Installed { info, .. } => { @@ -1069,3 +1122,58 @@ async fn run_hook( output: hook_output, }) } + +#[cfg(test)] +mod tests { + use super::{SelfRepoEnvCleanupGuard, dedupe_paths}; + use assert_fs::fixture::TempDir; + use std::path::PathBuf; + + #[test] + fn dedupe_paths_keeps_first_seen_order() { + let a = PathBuf::from("/tmp/a"); + let b = PathBuf::from("/tmp/b"); + let c = PathBuf::from("/tmp/c"); + + let deduped = dedupe_paths(vec![ + a.clone(), + b.clone(), + a.clone(), + c.clone(), + b.clone(), + c.clone(), + ]); + + assert_eq!(deduped, vec![a, b, c]); + } + + #[test] + fn cleanup_guard_removes_paths_on_drop() { + let temp = TempDir::new().expect("create temp dir"); + let env_path = temp.path().join("hooks/self-repo-env"); + fs_err::create_dir_all(&env_path).expect("create self-repo env dir"); + fs_err::write(env_path.join("marker.txt"), b"hello").expect("create marker file"); + + { + let _guard = SelfRepoEnvCleanupGuard::new(vec![env_path.clone()]); + } + + assert!(!env_path.exists()); + } + + #[test] + fn cleanup_guard_runs_on_panic_unwind() { + let temp = TempDir::new().expect("create temp dir"); + let env_path = temp.path().join("hooks/self-repo-env"); + fs_err::create_dir_all(&env_path).expect("create self-repo env dir"); + fs_err::write(env_path.join("marker.txt"), b"hello").expect("create marker file"); + + let panic_result = std::panic::catch_unwind(|| { + let _guard = SelfRepoEnvCleanupGuard::new(vec![env_path.clone()]); + panic!("trigger unwind"); + }); + + assert!(panic_result.is_err()); + assert!(!env_path.exists()); + } +} diff --git a/crates/prek/src/hook.rs b/crates/prek/src/hook.rs index c428373d..a2e819fe 100644 --- a/crates/prek/src/hook.rs +++ b/crates/prek/src/hook.rs @@ -580,8 +580,8 @@ impl Hook { /// Dependencies used to identify whether an existing hook environment can be reused. /// /// For remote hooks, the repo URL is included to avoid reusing an environment created - /// from a different remote repository. For self-repo hooks, the project path is included - /// to avoid reusing an environment created from a different project. + /// from a different remote repository. For self-repo hooks, the project path is included for + /// within-run environment identity and partitioning. pub(crate) fn env_key_dependencies(&self) -> &FxHashSet { match &*self.repo { Repo::Remote { .. } => self.dependencies.get_or_init(|| { diff --git a/crates/prek/src/store.rs b/crates/prek/src/store.rs index a2f62bc1..05cefa79 100644 --- a/crates/prek/src/store.rs +++ b/crates/prek/src/store.rs @@ -7,7 +7,7 @@ use anyhow::Result; use etcetera::BaseStrategy; use futures::StreamExt; use rustc_hash::FxHashSet; -use serde::{Deserialize, Serialize}; +use serde::Deserialize; use thiserror::Error; use tracing::{debug, warn}; @@ -44,19 +44,18 @@ fn expand_tilde(path: PathBuf) -> PathBuf { pub(crate) const REPO_MARKER: &str = ".prek-repo.json"; -#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] -pub(crate) struct TrackedConfig { - pub(crate) path: PathBuf, - pub(crate) project_root: PathBuf, -} - #[derive(Debug, Deserialize)] #[serde(untagged)] enum TrackedConfigsFile { - Entries(Vec), + Entries(Vec), LegacyPaths(Vec), } +#[derive(Debug, Deserialize)] +struct TrackedConfigEntry { + path: PathBuf, +} + /// A store for managing repos. #[derive(Debug)] pub struct Store { @@ -165,7 +164,7 @@ impl Store { let info = match InstallInfo::from_env_path(&path).await { Ok(info) => info, Err(err) => { - warn!(%err, path = %path.display(), "Skipping invalid installed hook"); + debug!(%err, path = %path.display(), "Skipping invalid installed hook"); return None; } }; @@ -245,7 +244,7 @@ impl Store { /// /// Seed `config-tracking.json` from the workspace discovery cache if it doesn't exist. /// This is a one-time upgrade helper: it only does work when tracking is empty. - pub(crate) fn tracked_configs(&self) -> Result, Error> { + pub(crate) fn tracked_configs(&self) -> Result, Error> { let tracking_file = self.config_tracking_file(); match fs_err::read_to_string(&tracking_file) { Err(e) if e.kind() == std::io::ErrorKind::NotFound => {} @@ -254,15 +253,11 @@ impl Store { let tracked = match serde_json::from_str::(&content) { Ok(TrackedConfigsFile::Entries(entries)) => entries .into_iter() - .map(|entry| TrackedConfig { - path: normalize_path(entry.path), - project_root: normalize_path(entry.project_root), - }) - .collect(), - Ok(TrackedConfigsFile::LegacyPaths(paths)) => paths - .into_iter() - .map(tracked_config_from_config_path) + .map(|entry| normalize_path(entry.path)) .collect(), + Ok(TrackedConfigsFile::LegacyPaths(paths)) => { + paths.into_iter().map(normalize_path).collect() + } Err(e) => { warn!("Failed to parse config tracking file: {e}, resetting"); FxHashSet::default() @@ -278,7 +273,7 @@ impl Store { } let cached = cached .into_iter() - .map(tracked_config_from_config_path) + .map(normalize_path) .collect::>(); debug!( @@ -293,14 +288,11 @@ impl Store { /// Track new config files for GC. pub(crate) fn track_configs<'a>( &self, - configs: impl Iterator, + config_paths: impl Iterator, ) -> Result<(), Error> { let mut tracked = self.tracked_configs()?; - for (config_path, project_root) in configs { - tracked.insert(TrackedConfig { - path: normalize_path(config_path.to_path_buf()), - project_root: normalize_path(project_root.to_path_buf()), - }); + for config_path in config_paths { + tracked.insert(normalize_path(config_path.to_path_buf())); } let tracking_file = self.config_tracking_file(); @@ -311,10 +303,7 @@ impl Store { } /// Update the tracked configs file. - pub(crate) fn update_tracked_configs( - &self, - configs: &FxHashSet, - ) -> Result<(), Error> { + pub(crate) fn update_tracked_configs(&self, configs: &FxHashSet) -> Result<(), Error> { let tracking_file = self.config_tracking_file(); let content = serde_json::to_string_pretty(configs)?; fs_err::write(&tracking_file, content)?; @@ -356,12 +345,3 @@ fn normalize_path(mut path: PathBuf) -> PathBuf { } fs_err::canonicalize(&path).unwrap_or(path) } - -fn tracked_config_from_config_path(path: PathBuf) -> TrackedConfig { - let path = normalize_path(path); - let project_root = path - .parent() - .map(Path::to_path_buf) - .unwrap_or_else(|| path.clone()); - TrackedConfig { path, project_root } -} diff --git a/crates/prek/tests/cache.rs b/crates/prek/tests/cache.rs index 5b77cfed..df471e12 100644 --- a/crates/prek/tests/cache.rs +++ b/crates/prek/tests/cache.rs @@ -556,431 +556,6 @@ fn cache_gc_keeps_local_hook_env() -> anyhow::Result<()> { Ok(()) } -#[test] -fn cache_gc_keeps_self_repo_hook_env() -> anyhow::Result<()> { - let context = TestContext::new(); - context.init_project(); - - let cwd = context.work_dir(); - - // Create a manifest with a Python hook (non-system language). - cwd.child(".pre-commit-hooks.yaml") - .write_str(indoc::indoc! {r#" - - id: self-python - name: Self Python Hook - entry: python -c "print('hello from self')" - language: python - "#})?; - - // Self-repo Python hooks install the project root as a package, so provide - // a minimal setup.py to keep the installer happy. - cwd.child("setup.py") - .write_str("from setuptools import setup; setup(name='dummy', version='0.0.1')")?; - - context.write_pre_commit_config(indoc::indoc! {r" - repos: - - repo: self - hooks: - - id: self-python - "}); - - cwd.child("file.txt").write_str("Hello\n")?; - context.git_add("."); - - // Install + run the self-repo hook so it creates a hook env under PREK_HOME/hooks. - cmd_snapshot!(context.filters(), context.run(), @r" - success: true - exit_code: 0 - ----- stdout ----- - Self Python Hook.........................................................Passed - - ----- stderr ----- - "); - - let home = context.home_dir(); - let hooks_dir = home.child("hooks"); - - let mut self_envs = Vec::new(); - for entry in fs_err::read_dir(hooks_dir.path())? { - let entry = entry?; - if !entry.file_type()?.is_dir() { - continue; - } - - let name = entry.file_name().to_string_lossy().to_string(); - if name.starts_with("python-") { - self_envs.push(name); - } - } - - assert!( - !self_envs.is_empty(), - "expected at least one self-repo hook env" - ); - - // Add an obviously-unused entry to ensure GC does work. - home.child("hooks/unused-hook-env").create_dir_all()?; - - cmd_snapshot!(context.filters(), context.command().args(["cache", "gc"]), @r" - success: true - exit_code: 0 - ----- stdout ----- - Removed 1 hook env ([SIZE]) - - ----- stderr ----- - "); - - // The self-repo hook env(s) should remain. - for env in self_envs { - home.child(format!("hooks/{env}")) - .assert(predicates::path::is_dir()); - } - // Unused should be swept. - home.child("hooks/unused-hook-env") - .assert(predicates::path::missing()); - - Ok(()) -} - -#[test] -fn cache_gc_keeps_self_repo_hook_env_with_explicit_relative_config_path() -> anyhow::Result<()> { - let context = TestContext::new(); - context.init_project(); - - let cwd = context.work_dir(); - - // Create a manifest with a Python hook (non-system language). - cwd.child(".pre-commit-hooks.yaml") - .write_str(indoc::indoc! {r#" - - id: self-python - name: Self Python Hook - entry: python -c "print('hello from self')" - language: python - "#})?; - - // Self-repo Python hooks install the project root as a package, so provide - // a minimal setup.py to keep the installer happy. - cwd.child("setup.py") - .write_str("from setuptools import setup; setup(name='dummy', version='0.0.1')")?; - - context.write_pre_commit_config(indoc::indoc! {r" - repos: - - repo: self - hooks: - - id: self-python - "}); - - cwd.child("file.txt").write_str("Hello\n")?; - context.git_add("."); - - // Run with an explicit relative --config path to exercise path normalization. - cmd_snapshot!( - context.filters(), - context - .command() - .args(["run", "--config", ".pre-commit-config.yaml"]), - @r" - success: true - exit_code: 0 - ----- stdout ----- - Self Python Hook.........................................................Passed - - ----- stderr ----- - " - ); - - let home = context.home_dir(); - let hooks_dir = home.child("hooks"); - - let mut self_envs = Vec::new(); - for entry in fs_err::read_dir(hooks_dir.path())? { - let entry = entry?; - if !entry.file_type()?.is_dir() { - continue; - } - - let name = entry.file_name().to_string_lossy().to_string(); - if name.starts_with("python-") { - self_envs.push(name); - } - } - - assert!( - !self_envs.is_empty(), - "expected at least one self-repo hook env" - ); - - // Add an obviously-unused entry to ensure GC does work. - home.child("hooks/unused-hook-env").create_dir_all()?; - - cmd_snapshot!(context.filters(), context.command().args(["cache", "gc"]), @r" - success: true - exit_code: 0 - ----- stdout ----- - Removed 1 hook env ([SIZE]) - - ----- stderr ----- - "); - - // The self-repo hook env(s) should remain. - for env in self_envs { - home.child(format!("hooks/{env}")) - .assert(predicates::path::is_dir()); - } - // Unused should be swept. - home.child("hooks/unused-hook-env") - .assert(predicates::path::missing()); - - Ok(()) -} - -#[test] -fn cache_gc_keeps_self_repo_hook_env_with_subdir_config_path() -> anyhow::Result<()> { - let context = TestContext::new(); - context.init_project(); - - let cwd = context.work_dir(); - let subdir = cwd.child("sub"); - subdir.create_dir_all()?; - - // Manifest stays at git root. - cwd.child(".pre-commit-hooks.yaml") - .write_str(indoc::indoc! {r#" - - id: self-python - name: Self Python Hook - entry: python -c "print('hello from self')" - language: python - "#})?; - - cwd.child("setup.py") - .write_str("from setuptools import setup; setup(name='dummy', version='0.0.1')")?; - - // Config lives in a subdirectory and references repo:self. - subdir - .child(".pre-commit-config.yaml") - .write_str(indoc::indoc! {r" - repos: - - repo: self - hooks: - - id: self-python - "})?; - - cwd.child("file.txt").write_str("Hello\n")?; - context.git_add("."); - - cmd_snapshot!( - context.filters(), - context - .command() - .args(["run", "--config", "sub/.pre-commit-config.yaml"]), - @r" - success: true - exit_code: 0 - ----- stdout ----- - Self Python Hook.........................................................Passed - - ----- stderr ----- - " - ); - - let home = context.home_dir(); - let hooks_dir = home.child("hooks"); - - let mut self_envs = Vec::new(); - for entry in fs_err::read_dir(hooks_dir.path())? { - let entry = entry?; - if !entry.file_type()?.is_dir() { - continue; - } - - let name = entry.file_name().to_string_lossy().to_string(); - if name.starts_with("python-") { - self_envs.push(name); - } - } - - assert!( - !self_envs.is_empty(), - "expected at least one self-repo hook env" - ); - - home.child("hooks/unused-hook-env").create_dir_all()?; - - cmd_snapshot!(context.filters(), context.command().args(["cache", "gc"]), @r" - success: true - exit_code: 0 - ----- stdout ----- - Removed 1 hook env ([SIZE]) - - ----- stderr ----- - "); - - for env in self_envs { - home.child(format!("hooks/{env}")) - .assert(predicates::path::is_dir()); - } - home.child("hooks/unused-hook-env") - .assert(predicates::path::missing()); - - Ok(()) -} - -#[test] -fn cache_gc_prunes_stale_root_self_repo_env_when_only_subdir_config_tracked() -> anyhow::Result<()> -{ - let context = TestContext::new(); - context.init_project(); - - let cwd = context.work_dir(); - let subdir = cwd.child("sub"); - subdir.create_dir_all()?; - - // Root manifest + config. - cwd.child(".pre-commit-hooks.yaml") - .write_str(indoc::indoc! {r#" - - id: shared - name: Root Hook - entry: python -c "print('root')" - language: python - "#})?; - cwd.child("setup.py") - .write_str("from setuptools import setup; setup(name='rootpkg', version='0.0.1')")?; - context.write_pre_commit_config(indoc::indoc! {r" - repos: - - repo: self - hooks: - - id: shared - "}); - - // Subdir manifest + config with same hook id. - subdir - .child(".pre-commit-hooks.yaml") - .write_str(indoc::indoc! {r#" - - id: shared - name: Sub Hook - entry: python -c "print('sub')" - language: python - "#})?; - subdir - .child("setup.py") - .write_str("from setuptools import setup; setup(name='subpkg', version='0.0.1')")?; - subdir - .child(".pre-commit-config.yaml") - .write_str(indoc::indoc! {r" - repos: - - repo: self - hooks: - - id: shared - "})?; - - cwd.child("file.txt").write_str("Hello\n")?; - context.git_add("."); - - // Install root env. - cmd_snapshot!( - context.filters(), - context.command().args(["run", "--config", ".pre-commit-config.yaml"]), - @r" - success: true - exit_code: 0 - ----- stdout ----- - Root Hook................................................................Passed - - ----- stderr ----- - " - ); - - // Drop root config; only subdir config should remain tracked after GC. - fs_err::remove_file(cwd.child(".pre-commit-config.yaml").path())?; - context.git_add("."); - - // Install subdir env and ensure subdir config is tracked. - cmd_snapshot!( - context.filters(), - context.command().arg("run").current_dir(subdir.path()), - @r" - success: true - exit_code: 0 - ----- stdout ----- - Sub Hook.................................................................Passed - - ----- stderr ----- - " - ); - - let home = context.home_dir(); - let hooks_dir = home.child("hooks"); - let root_dep = fs_err::canonicalize(cwd.path()) - .unwrap_or_else(|_| cwd.path().to_path_buf()) - .to_string_lossy() - .to_string(); - let sub_dep = fs_err::canonicalize(subdir.path()) - .unwrap_or_else(|_| subdir.path().to_path_buf()) - .to_string_lossy() - .to_string(); - - let mut root_envs = Vec::new(); - let mut sub_envs = Vec::new(); - for entry in fs_err::read_dir(hooks_dir.path())? { - let entry = entry?; - if !entry.file_type()?.is_dir() { - continue; - } - let env_dir = entry.path(); - let marker_path = env_dir.join(".prek-hook.json"); - if !marker_path.is_file() { - continue; - } - let marker = fs_err::read_to_string(&marker_path)?; - let marker_json: serde_json::Value = serde_json::from_str(&marker)?; - let deps = marker_json["dependencies"] - .as_array() - .cloned() - .unwrap_or_default(); - let dep_strings: Vec = deps - .into_iter() - .filter_map(|v| v.as_str().map(ToString::to_string)) - .collect(); - if dep_strings.iter().any(|d| d == &root_dep) { - root_envs.push(env_dir.clone()); - } - if dep_strings.iter().any(|d| d == &sub_dep) { - sub_envs.push(env_dir); - } - } - assert!(!root_envs.is_empty(), "expected at least one root self env"); - assert!( - !sub_envs.is_empty(), - "expected at least one subdir self env" - ); - - cmd_snapshot!(context.filters(), context.command().args(["cache", "gc"]), @r" - success: true - exit_code: 0 - ----- stdout ----- - Removed 1 hook env ([SIZE]) - - ----- stderr ----- - "); - - for env in root_envs { - assert!( - !env.exists(), - "expected stale root self env to be pruned: {}", - env.display() - ); - } - for env in sub_envs { - assert!( - env.exists(), - "expected active subdir self env to remain: {}", - env.display() - ); - } - - Ok(()) -} - fn write_config_tracking_file( home: &ChildPath, configs: &[&std::path::Path], diff --git a/crates/prek/tests/self_repo.rs b/crates/prek/tests/self_repo.rs index 14e8c291..0862c9f1 100644 --- a/crates/prek/tests/self_repo.rs +++ b/crates/prek/tests/self_repo.rs @@ -1,5 +1,7 @@ mod common; +use std::path::Path; + use crate::common::{TestContext, cmd_snapshot, git_cmd}; use assert_cmd::assert::OutputAssertExt; @@ -147,9 +149,125 @@ fn self_repo_unknown_hook_id() -> anyhow::Result<()> { Ok(()) } -/// Two projects sharing the same `PREK_HOME` must not reuse each other's -/// self-repo hook environments. Each project installs its own package -/// into the env, so sharing would cause incorrect behavior. +#[test] +fn self_repo_refreshes_after_source_change_between_runs() -> anyhow::Result<()> { + let context = TestContext::new(); + context.init_project(); + + let cwd = context.work_dir(); + cwd.child(".pre-commit-hooks.yaml") + .write_str(indoc::indoc! {r" + - id: showver + name: show version + entry: showver + language: python + pass_filenames: false + "})?; + cwd.child("setup.py").write_str(indoc::indoc! {r" + from setuptools import setup + + setup( + name='showver', + version='0.0.1', + packages=['hookpkg'], + entry_points={'console_scripts': ['showver=hookpkg.cli:main']}, + ) + "})?; + cwd.child("hookpkg").create_dir_all()?; + cwd.child("hookpkg/__init__.py").write_str("")?; + cwd.child("hookpkg/cli.py") + .write_str("def main():\n print('HOOK_VERSION=v1')\n raise SystemExit(1)\n")?; + + context.write_pre_commit_config(indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: showver + "}); + cwd.child("file.txt").write_str("hello\n")?; + context.git_add("."); + + let output_v1 = context + .command() + .args(["run", "--all-files", "--verbose"]) + .output()?; + assert!(!output_v1.status.success(), "expected first run to fail"); + let stdout_v1 = String::from_utf8_lossy(&output_v1.stdout); + assert!(stdout_v1.contains("HOOK_VERSION=v1")); + + cwd.child("hookpkg/cli.py") + .write_str("def main():\n print('HOOK_VERSION=v2')\n raise SystemExit(1)\n")?; + + let output_v2 = context + .command() + .args(["run", "--all-files", "--verbose"]) + .output()?; + assert!(!output_v2.status.success(), "expected second run to fail"); + let stdout_v2 = String::from_utf8_lossy(&output_v2.stdout); + assert!(stdout_v2.contains("HOOK_VERSION=v2")); + assert!(!stdout_v2.contains("HOOK_VERSION=v1")); + + assert_eq!( + python_env_count(context.home_dir().child("hooks").path())?, + 0 + ); + + Ok(()) +} + +#[test] +fn self_repo_does_not_persist_env_across_runs() -> anyhow::Result<()> { + let context = TestContext::new(); + context.init_project(); + + let cwd = context.work_dir(); + + cwd.child(".pre-commit-hooks.yaml") + .write_str(indoc::indoc! {r#" + - id: self-python + name: Self Python Hook + entry: python -c "print('ok')" + language: python + pass_filenames: false + "#})?; + cwd.child("setup.py") + .write_str("from setuptools import setup; setup(name='dummy', version='0.0.1')")?; + + context.write_pre_commit_config(indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: self-python + "}); + + cwd.child("file.txt").write_str("Hello\n")?; + context.git_add("."); + + context + .command() + .args(["run", "--all-files"]) + .assert() + .success(); + assert_eq!( + python_env_count(context.home_dir().child("hooks").path())?, + 0 + ); + + context + .command() + .args(["run", "--all-files"]) + .assert() + .success(); + assert_eq!( + python_env_count(context.home_dir().child("hooks").path())?, + 0 + ); + + Ok(()) +} + +/// Two projects sharing the same `PREK_HOME` should both run successfully +/// without relying on persisted self-repo environments. #[test] fn self_repo_cross_project_env_isolation() -> anyhow::Result<()> { // Use one TestContext for the shared PREK_HOME. @@ -202,7 +320,6 @@ fn self_repo_cross_project_env_isolation() -> anyhow::Result<()> { .map(std::path::PathBuf::from) .unwrap_or_else(|| std::path::PathBuf::from(assert_cmd::cargo::cargo_bin!("prek"))); - // Run project A — installs its env. let output_a = std::process::Command::new(&prek_bin) .arg("run") .current_dir(&*project_a) @@ -221,7 +338,6 @@ fn self_repo_cross_project_env_isolation() -> anyhow::Result<()> { ); assert!(stdout_a.contains("Passed"), "project A hook did not pass"); - // Run project B — must create a separate env, not reuse A's. let output_b = std::process::Command::new(&prek_bin) .arg("run") .current_dir(&*project_b) @@ -240,21 +356,22 @@ fn self_repo_cross_project_env_isolation() -> anyhow::Result<()> { ); assert!(stdout_b.contains("Passed"), "project B hook did not pass"); - // Verify the hook envs are distinct (at least 2 python-* dirs under hooks/). - let hooks_dir = home.child("hooks"); - let python_envs: Vec<_> = fs_err::read_dir(hooks_dir.path())? - .filter_map(Result::ok) - .filter(|e| { - e.file_type().map(|t| t.is_dir()).unwrap_or(false) - && e.file_name().to_string_lossy().starts_with("python-") - }) - .collect(); - - assert!( - python_envs.len() >= 2, - "expected at least 2 separate python hook envs, found {}", - python_envs.len(), - ); + assert_eq!(python_env_count(home.child("hooks").path())?, 0); Ok(()) } + +fn python_env_count(hooks_dir: &Path) -> anyhow::Result { + let mut count = 0; + for entry in fs_err::read_dir(hooks_dir)? { + let entry = entry?; + if !entry.file_type()?.is_dir() { + continue; + } + + if entry.file_name().to_string_lossy().starts_with("python-") { + count += 1; + } + } + Ok(count) +} diff --git a/docs/configuration.md b/docs/configuration.md index d0fde389..8bd51409 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -773,6 +773,10 @@ Only `id` is required — all other fields are optional overrides, the same as r If `.pre-commit-hooks.yaml` does not exist in the project root, `prek` reports an error at hook initialization time. +Hook environments for `repo: self` are ephemeral per run. `prek` may reuse them within the +same invocation, but it does not reuse them across separate runs. This favors correctness and +ensures source changes in the current project are picked up on the next run. + Example: === "prek.toml" diff --git a/docs/diff.md b/docs/diff.md index f634fb41..ffec84bf 100644 --- a/docs/diff.md +++ b/docs/diff.md @@ -5,7 +5,7 @@ - `prek` supports both `.pre-commit-config.yaml` and `.pre-commit-config.yml` configuration files. - `prek` implements some common hooks from `pre-commit-hooks` in Rust for better performance. - `prek` supports `repo: builtin` for offline, zero-setup hooks. -- `prek` supports `repo: self` for projects that publish hooks via `.pre-commit-hooks.yaml` to consume their own hooks. +- `prek` supports `repo: self` for projects that publish hooks via `.pre-commit-hooks.yaml` to consume their own hooks (using ephemeral per-run hook environments). - `prek` uses `~/.cache/prek` as the default cache directory for repos, environments and toolchains. - `prek` decoupled hook environment from their repositories, allowing shared toolchains and environments across hooks. - `prek` supports `language_version` as a semver specifier and automatically installs the required toolchains. From 12b52c6783fe983af8f6768e61ae7965d6734d3e Mon Sep 17 00:00:00 2001 From: Shaan Majid <70789625+shaanmajid@users.noreply.github.com> Date: Mon, 9 Feb 2026 18:32:30 -0600 Subject: [PATCH 5/5] fix: clean up self-repo envs on partial install failure Thread a SelfRepoEnvTracker through install_hooks so the cleanup guard is created before installation begins. This ensures ephemeral self-repo env directories are removed even when a later hook in the batch fails to install. --- crates/prek/src/cli/run/run.rs | 98 ++++++++++++++++++++++++++-------- crates/prek/tests/self_repo.rs | 48 +++++++++++++++++ 2 files changed, 125 insertions(+), 21 deletions(-) diff --git a/crates/prek/src/cli/run/run.rs b/crates/prek/src/cli/run/run.rs index 7c422e49..69919e06 100644 --- a/crates/prek/src/cli/run/run.rs +++ b/crates/prek/src/cli/run/run.rs @@ -1,8 +1,8 @@ use std::fmt::Write as _; use std::io::Write as _; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::rc::Rc; -use std::sync::{Arc, LazyLock}; +use std::sync::{Arc, LazyLock, Mutex}; use anyhow::{Context, Result}; use futures::stream::{FuturesUnordered, StreamExt}; @@ -160,9 +160,15 @@ pub(crate) async fn run( filtered_hooks.iter().map(|h| &h.id).collect::>() ); let reporter = HookInstallReporter::new(printer); - let installed_hooks = install_hooks(filtered_hooks, store, &reporter).await?; - let self_repo_env_paths = collect_self_repo_env_paths(&installed_hooks); - let _self_repo_env_cleanup_guard = SelfRepoEnvCleanupGuard::new(self_repo_env_paths); + let self_repo_env_tracker = SelfRepoEnvTracker::default(); + let _self_repo_env_cleanup_guard = SelfRepoEnvCleanupGuard::new(self_repo_env_tracker.clone()); + let installed_hooks = install_hooks_with_tracker( + filtered_hooks, + store, + &reporter, + Some(&self_repo_env_tracker), + ) + .await?; async { // Release the store lock. @@ -219,14 +225,6 @@ pub(crate) async fn run( .await } -fn collect_self_repo_env_paths(installed_hooks: &[InstalledHook]) -> Vec { - dedupe_paths(installed_hooks.iter().filter_map(|hook| { - matches!(hook.repo(), Repo::SelfRepo { .. }) - .then(|| hook.env_path().map(std::path::Path::to_path_buf)) - .flatten() - })) -} - fn dedupe_paths(paths: impl IntoIterator) -> Vec { let mut seen = FxHashSet::default(); paths @@ -253,18 +251,56 @@ fn cleanup_self_repo_env_paths(paths: &[PathBuf]) { } struct SelfRepoEnvCleanupGuard { - paths: Vec, + tracker: SelfRepoEnvTracker, } impl SelfRepoEnvCleanupGuard { - fn new(paths: Vec) -> Self { - Self { paths } + fn new(tracker: SelfRepoEnvTracker) -> Self { + Self { tracker } } } impl Drop for SelfRepoEnvCleanupGuard { fn drop(&mut self) { - cleanup_self_repo_env_paths(&self.paths); + let paths = self.tracker.deduped_paths(); + cleanup_self_repo_env_paths(&paths); + } +} + +#[derive(Clone, Default)] +struct SelfRepoEnvTracker { + paths: Arc>>, +} + +impl SelfRepoEnvTracker { + fn record_path(&self, path: &Path) { + let mut paths = match self.paths.lock() { + Ok(paths) => paths, + Err(poisoned) => { + warn!("Self-repo env tracker lock poisoned while recording path"); + poisoned.into_inner() + } + }; + paths.push(path.to_path_buf()); + } + + fn record_from_hook(&self, hook: &InstalledHook) { + if matches!(hook.repo(), Repo::SelfRepo { .. }) + && let Some(path) = hook.env_path() + { + self.record_path(path); + } + } + + fn deduped_paths(&self) -> Vec { + let paths = match self.paths.lock() { + Ok(paths) => paths.clone(), + Err(poisoned) => { + warn!("Self-repo env tracker lock poisoned while collecting paths"); + poisoned.into_inner().clone() + } + }; + dedupe_paths(paths) } } @@ -362,6 +398,15 @@ pub async fn install_hooks( hooks: Vec>, store: &Store, reporter: &HookInstallReporter, +) -> Result> { + install_hooks_with_tracker(hooks, store, reporter, None).await +} + +async fn install_hooks_with_tracker( + hooks: Vec>, + store: &Store, + reporter: &HookInstallReporter, + self_repo_env_tracker: Option<&SelfRepoEnvTracker>, ) -> Result> { let num_hooks = hooks.len(); let mut result = Vec::with_capacity(hooks.len()); @@ -441,7 +486,11 @@ pub async fn install_hooks( "Found installed environment for hook `{hook}` at `{}`", info.env_path.display() ); - hook_envs.push(InstalledHook::Installed { hook, info }); + let installed_hook = InstalledHook::Installed { hook, info }; + if let Some(tracker) = self_repo_env_tracker { + tracker.record_from_hook(&installed_hook); + } + hook_envs.push(installed_hook); continue; } @@ -452,6 +501,9 @@ pub async fn install_hooks( .install(hook.clone(), store, reporter) .await .with_context(|| format!("Failed to install hook `{hook}`"))?; + if let Some(tracker) = self_repo_env_tracker { + tracker.record_from_hook(&installed_hook); + } if !matches!(hook.repo(), Repo::SelfRepo { .. }) { installed_hook @@ -1125,7 +1177,7 @@ async fn run_hook( #[cfg(test)] mod tests { - use super::{SelfRepoEnvCleanupGuard, dedupe_paths}; + use super::{SelfRepoEnvCleanupGuard, SelfRepoEnvTracker, dedupe_paths}; use assert_fs::fixture::TempDir; use std::path::PathBuf; @@ -1153,9 +1205,11 @@ mod tests { let env_path = temp.path().join("hooks/self-repo-env"); fs_err::create_dir_all(&env_path).expect("create self-repo env dir"); fs_err::write(env_path.join("marker.txt"), b"hello").expect("create marker file"); + let tracker = SelfRepoEnvTracker::default(); + tracker.record_path(&env_path); { - let _guard = SelfRepoEnvCleanupGuard::new(vec![env_path.clone()]); + let _guard = SelfRepoEnvCleanupGuard::new(tracker); } assert!(!env_path.exists()); @@ -1167,9 +1221,11 @@ mod tests { let env_path = temp.path().join("hooks/self-repo-env"); fs_err::create_dir_all(&env_path).expect("create self-repo env dir"); fs_err::write(env_path.join("marker.txt"), b"hello").expect("create marker file"); + let tracker = SelfRepoEnvTracker::default(); + tracker.record_path(&env_path); let panic_result = std::panic::catch_unwind(|| { - let _guard = SelfRepoEnvCleanupGuard::new(vec![env_path.clone()]); + let _guard = SelfRepoEnvCleanupGuard::new(tracker); panic!("trigger unwind"); }); diff --git a/crates/prek/tests/self_repo.rs b/crates/prek/tests/self_repo.rs index 0862c9f1..65a08601 100644 --- a/crates/prek/tests/self_repo.rs +++ b/crates/prek/tests/self_repo.rs @@ -266,6 +266,54 @@ fn self_repo_does_not_persist_env_across_runs() -> anyhow::Result<()> { Ok(()) } +#[test] +fn self_repo_partial_install_failure_does_not_leak_envs() -> anyhow::Result<()> { + let context = TestContext::new(); + context.init_project(); + + let cwd = context.work_dir(); + cwd.child(".pre-commit-hooks.yaml") + .write_str(indoc::indoc! {r#" + - id: ok + name: ok + entry: python -c "print('ok')" + language: python + pass_filenames: false + - id: badver + name: badver + entry: python -c "print('badver')" + language: python + pass_filenames: false + "#})?; + cwd.child("setup.py") + .write_str("from setuptools import setup; setup(name='selfhooks', version='0.0.1')")?; + + context.write_pre_commit_config(indoc::indoc! {r" + repos: + - repo: self + hooks: + - id: ok + - id: badver + language_version: python9.9 + "}); + cwd.child("file.txt").write_str("Hello\n")?; + context.git_add("."); + + let output = context.command().args(["run", "--all-files"]).output()?; + assert!( + !output.status.success(), + "run should fail due to invalid language_version" + ); + + assert_eq!( + python_env_count(context.home_dir().child("hooks").path())?, + 0, + "ephemeral self-repo envs should be removed on install failure", + ); + + Ok(()) +} + /// Two projects sharing the same `PREK_HOME` should both run successfully /// without relying on persisted self-repo environments. #[test]