diff --git a/dvc/config.py b/dvc/config.py index ce7f73289c..1636c48121 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -129,6 +129,36 @@ def get_dir(cls, level): return system_config_dir() return None + @property + def _should_prefer_local_dvc_dir(self) -> bool: + """Check if we should prefer local_dvc_dir over dvc_dir for reading config. + + Returns True when we should use local_dvc_dir instead of dvc_dir: + - This happens in brancher/gc scenarios where fs is switched to GitFileSystem + to traverse commits, but we still need to read complete config from workspace + (e.g., config.local with passwords) + - Detected by: local_dvc_dir exists, differs from dvc_dir, and has config file + + Returns False when we should use dvc_dir: + - Repo(rev="...") scenario: explicitly reading config from a specific revision + (dvc_dir is a git root path, workspace has no config) + - Normal workspace: local_dvc_dir equals dvc_dir, no preference needed + """ + if self.local_dvc_dir is None or self.local_dvc_dir == self.dvc_dir: + return False + + # Check if dvc_dir is a git root path (e.g., "/.dvc") + # Git paths use posix separators, so check if parent dir is posix root + if self.dvc_dir and posixpath.dirname(self.dvc_dir) == posixpath.sep: + # When dvc_dir is a git root path, check if workspace has config + # If workspace has config, prefer it (brancher scenario) + # If not, use dvc_dir from git history (Repo(rev="...") scenario) + workspace_config = self.wfs.join(self.local_dvc_dir, self.CONFIG) + return self.wfs.exists(workspace_config) + + # Otherwise, this is brancher scenario with non-root path + return True + @cached_property def files(self) -> dict[str, str]: files = { @@ -136,9 +166,23 @@ def files(self) -> dict[str, str]: for level in ("system", "global") } - if self.dvc_dir is not None: - files["repo"] = self.fs.join(self.dvc_dir, self.CONFIG) + # Determine which dvc_dir and filesystem to use for repo config + repo_fs: FileSystem + if self._should_prefer_local_dvc_dir: + # Scenario: brancher switched fs, but need to read workspace config + # Use workspace path and filesystem + repo_dvc_dir = self.local_dvc_dir + repo_fs = self.wfs + else: + # Scenario: Repo(rev="...") or normal workspace + # Use self.fs (may be GitFileSystem for reading from git history) + repo_dvc_dir = self.dvc_dir + repo_fs = self.fs + + if repo_dvc_dir is not None: + files["repo"] = repo_fs.join(repo_dvc_dir, self.CONFIG) + # Local config always from workspace if it exists if self.local_dvc_dir is not None: files["local"] = self.wfs.join(self.local_dvc_dir, self.CONFIG_LOCAL) @@ -196,9 +240,38 @@ def load( self.update(conf) def _get_fs(self, level): - # NOTE: this might be a Gitfs, which doesn't see things outside of - # the repo. - return self.fs if level == "repo" else self.wfs + """Get filesystem for reading config at the specified level. + + Args: + level: Config level ("system", "global", "repo", "local") + + Returns: + FileSystem to use for reading config + + Raises: + ValueError: If level is not one of the valid config levels + """ + # system/global are always outside repo, must use wfs + if level in ("system", "global"): + return self.wfs + + # local config always from workspace + if level == "local": + return self.wfs + + # repo config depends on whether we have separate workspace + if level == "repo": + if self._should_prefer_local_dvc_dir: + # Prefer local_dvc_dir: use workspace config (brancher scenario) + return self.wfs + # Use dvc_dir: may be from git history or normal workspace + return self.fs + + # Invalid level - this should never happen in practice as all callers + # access self.files[level] first, which will raise KeyError for invalid levels + raise ValueError( + f"Invalid config level: '{level}'. Must be one of {self.LEVELS}" + ) @staticmethod def load_file(path, fs=None) -> dict: diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 92ba7810d3..3304359447 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -60,6 +60,81 @@ def test_get_fs(tmp_dir, scm): assert config._get_fs("system") == config.wfs +def test_should_prefer_local_dvc_dir(tmp_dir, scm): + """Test _should_prefer_local_dvc_dir property logic.""" + tmp_dir.scm_gen("foo", "foo", commit="add foo") + + # Scenario 1: Normal workspace (local_dvc_dir == dvc_dir) + config = Config(dvc_dir=str(tmp_dir / ".dvc"), local_dvc_dir=str(tmp_dir / ".dvc")) + assert not config._should_prefer_local_dvc_dir + + # Scenario 2: Repo(rev="...") (local_dvc_dir is None) + fs = scm.get_fs("master") + config = Config(dvc_dir="/.dvc", local_dvc_dir=None, fs=fs) + assert not config._should_prefer_local_dvc_dir + + # Scenario 3: Brancher (local_dvc_dir != dvc_dir, workspace has config) + dvc_dir = tmp_dir / ".dvc" + dvc_dir.mkdir() + (dvc_dir / "config").write_text( + """ + [core] + analytics = false + """ + ) + + fs = scm.get_fs("master") + config = Config(dvc_dir="/.dvc", local_dvc_dir=str(dvc_dir), fs=fs) + assert config._should_prefer_local_dvc_dir + + # Scenario 4: Repo(rev="...") with workspace dir but no config + dvc_dir2 = tmp_dir / ".dvc2" + dvc_dir2.mkdir() + + config = Config(dvc_dir="/.dvc", local_dvc_dir=str(dvc_dir2), fs=fs) + assert not config._should_prefer_local_dvc_dir + + +def test_get_fs_brancher_scenario(tmp_dir, scm): + """Test _get_fs returns wfs when in brancher scenario.""" + tmp_dir.scm_gen("foo", "foo", commit="add foo") + + # Create workspace config + dvc_dir = tmp_dir / ".dvc" + dvc_dir.mkdir() + (dvc_dir / "config").write_text( + """ + [core] + analytics = false + """ + ) + + # Simulate brancher scenario: fs is GitFileSystem but + # local_dvc_dir points to workspace + fs = scm.get_fs("master") + config = Config( + dvc_dir="/.dvc", # git path + local_dvc_dir=str(dvc_dir), # workspace path + fs=fs, + ) + + # Should prefer local_dvc_dir + assert config._should_prefer_local_dvc_dir + assert config._get_fs("repo") == config.wfs + assert config._get_fs("local") == config.wfs + assert config._get_fs("global") == config.wfs + assert config._get_fs("system") == config.wfs + + +def test_get_fs_invalid_level(tmp_dir, dvc): + """Test _get_fs raises ValueError for invalid config level.""" + config = Config.from_cwd(validate=False) + + # Test that passing an invalid level raises ValueError + with pytest.raises(ValueError, match="Invalid config level: 'invalid'"): + config._get_fs("invalid") + + def test_s3_ssl_verify(tmp_dir, dvc): config = Config.from_cwd(validate=False) with config.edit() as conf: diff --git a/tests/unit/test_config_gitfs_fix.py b/tests/unit/test_config_gitfs_fix.py new file mode 100644 index 0000000000..6d92d599c6 --- /dev/null +++ b/tests/unit/test_config_gitfs_fix.py @@ -0,0 +1,38 @@ +from dvc.config import Config +from dvc.fs import GitFileSystem + + +def test_config_loads_from_workspace_with_gitfs(tmp_dir, scm): + """Config should load from workspace, not git history when using GitFileSystem.""" + dvc_dir = tmp_dir / ".dvc" + dvc_dir.mkdir() + + # Create complete config in workspace + (dvc_dir / "config").write_text( + """\ + [core] + remote = test-webdav + [remote "test-webdav"] + url = webdav://localhost:9000/ + """ + ) + + (dvc_dir / "config.local").write_text( + """\ + [remote "test-webdav"] + password = 12345678 + """ + ) + + # Need at least one commit for HEAD to exist + tmp_dir.scm_gen("foo", "foo", commit="init") + + # Create Config with GitFileSystem + git_fs = GitFileSystem(scm=scm, rev="HEAD") + config = Config( + dvc_dir="/.dvc", local_dvc_dir=str(dvc_dir), fs=git_fs, validate=True + ) + + # Should load from workspace + assert config["remote"]["test-webdav"]["url"] == "webdav://localhost:9000/" + assert config["remote"]["test-webdav"]["password"] == "12345678"