Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions cmd/entire/cli/bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
package cli

import (
"io"
"testing"

"github.com/entireio/cli/cmd/entire/cli/benchutil"
"github.com/entireio/cli/cmd/entire/cli/paths"
"github.com/entireio/cli/cmd/entire/cli/session"
)

/* To use the interactive flame graph, run:

mise exec -- go tool pprof -http=:8089 /tmp/status_cpu.prof &>/dev/null & echo "pprof server started on http://localhost:8089"

and then go to http://localhost:8089/ui/flamegraph

*/

// BenchmarkStatusCommand benchmarks the `entire status` command end-to-end.
// This is the top-level entry point for understanding status command latency.
//
// Key I/O operations measured:
// - git rev-parse --show-toplevel (RepoRoot, cached after first call)
// - git rev-parse --git-common-dir (NewStateStore, per invocation)
// - git rev-parse --abbrev-ref HEAD (resolveWorktreeBranch, per unique worktree)
// - os.ReadFile for settings.json, each session state file
// - JSON unmarshaling for settings and each session state
//
// The primary scaling dimension is active session count.
func BenchmarkStatusCommand(b *testing.B) {
b.Run("Short/NoSessions", benchStatus(0, false))
b.Run("Short/1Session", benchStatus(1, false))
b.Run("Short/5Sessions", benchStatus(5, false))
b.Run("Short/10Sessions", benchStatus(10, false))
b.Run("Short/20Sessions", benchStatus(20, false))
b.Run("Detailed/NoSessions", benchStatus(0, true))
b.Run("Detailed/5Sessions", benchStatus(5, true))
}

func benchStatus(sessionCount int, detailed bool) func(*testing.B) {
return func(b *testing.B) {
repo := benchutil.NewBenchRepo(b, benchutil.RepoOpts{})

// Create active session state files in .git/entire-sessions/
for range sessionCount {
repo.CreateSessionState(b, benchutil.SessionOpts{})
}

// runStatus uses paths.RepoRoot() which requires cwd to be in the repo.
b.Chdir(repo.Dir)
paths.ClearRepoRootCache()
session.ClearGitCommonDirCache()

b.ResetTimer()
for range b.N {
// Clear caches each iteration to simulate a fresh CLI invocation.
// In real usage, each `entire status` call starts cold.
paths.ClearRepoRootCache()
session.ClearGitCommonDirCache()

if err := runStatus(io.Discard, detailed); err != nil {
b.Fatalf("runStatus: %v", err)
}
}
}
}
14 changes: 13 additions & 1 deletion cmd/entire/cli/benchutil/benchutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ type BenchRepo struct {

// WorktreeID is the worktree identifier (empty for main worktree).
WorktreeID string

// Strategy is the strategy name used in .entire/settings.json.
Strategy string
}

// RepoOpts configures how NewBenchRepo creates the test repository.
Expand Down Expand Up @@ -167,6 +170,7 @@ func NewBenchRepo(b *testing.B, opts RepoOpts) *BenchRepo {
Repo: repo,
Store: checkpoint.NewGitStore(repo),
HeadHash: headHash.String(),
Strategy: opts.Strategy,
}

// Determine worktree ID
Expand Down Expand Up @@ -344,9 +348,17 @@ func (br *BenchRepo) WriteTranscriptFile(b *testing.B, sessionID string, data []
// SeedShadowBranch creates N checkpoint commits on the shadow branch
// for the current HEAD. This simulates a session that already has
// prior checkpoints saved.
//
// Temporarily changes cwd to br.Dir because WriteTemporary uses
// paths.RepoRoot() which depends on os.Getwd().
func (br *BenchRepo) SeedShadowBranch(b *testing.B, sessionID string, checkpointCount int, filesPerCheckpoint int) {
b.Helper()

// WriteTemporary internally calls paths.RepoRoot() which uses os.Getwd().
// Switch cwd so it resolves to the bench repo.
b.Chdir(br.Dir)
paths.ClearRepoRootCache()

for i := range checkpointCount {
var modified []string
for j := range filesPerCheckpoint {
Expand Down Expand Up @@ -411,7 +423,7 @@ func (br *BenchRepo) SeedMetadataBranch(b *testing.B, checkpointCount int) {
err = br.Store.WriteCommitted(context.Background(), checkpoint.WriteCommittedOptions{
CheckpointID: cpID,
SessionID: sessionID,
Strategy: "manual-commit",
Strategy: br.Strategy,
Transcript: transcript,
Prompts: []string{fmt.Sprintf("Implement feature %d", i)},
FilesTouched: files,
Expand Down
29 changes: 13 additions & 16 deletions cmd/entire/cli/benchutil/benchutil_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package benchutil

import (
"fmt"
"testing"

"github.com/entireio/cli/cmd/entire/cli/session"
Expand All @@ -23,25 +22,23 @@ func BenchmarkNewBenchRepo_Large(b *testing.B) {
}

func BenchmarkSeedShadowBranch(b *testing.B) {
for _, count := range []int{1, 5, 10} {
b.Run(fmt.Sprintf("%dCheckpoints", count), func(b *testing.B) {
for b.Loop() {
repo := NewBenchRepo(b, RepoOpts{FileCount: 10})
sessionID := repo.CreateSessionState(b, SessionOpts{})
repo.SeedShadowBranch(b, sessionID, count, 3)
}
})
for b.Loop() {
b.StopTimer()
repo := NewBenchRepo(b, RepoOpts{FileCount: 10})
sessionID := repo.CreateSessionState(b, SessionOpts{})
b.StartTimer()

repo.SeedShadowBranch(b, sessionID, 5, 3)
}
}

func BenchmarkSeedMetadataBranch(b *testing.B) {
for _, count := range []int{1, 5, 10} {
b.Run(fmt.Sprintf("%dCheckpoints", count), func(b *testing.B) {
for b.Loop() {
repo := NewBenchRepo(b, RepoOpts{FileCount: 10})
repo.SeedMetadataBranch(b, count)
}
})
for b.Loop() {
b.StopTimer()
repo := NewBenchRepo(b, RepoOpts{FileCount: 10})
b.StartTimer()

repo.SeedMetadataBranch(b, 10)
}
}
Comment on lines 24 to 43
Copy link

Copilot AI Feb 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These benchmarks reuse the same temp repo across iterations while mutating git history/metadata on each loop. That makes later iterations operate on a larger repo/branch than early iterations, which can skew benchmark results and reduce comparability. Consider moving repo/session setup inside the loop with the timer stopped (or recreate/reset the repo/branches per iteration) so each iteration measures the same steady-state work.

Copilot uses AI. Check for mistakes.

Expand Down
Loading