diff --git a/cmd/entire/cli/checkpoint/temporary.go b/cmd/entire/cli/checkpoint/temporary.go index 3ff864901..250b88913 100644 --- a/cmd/entire/cli/checkpoint/temporary.go +++ b/cmd/entire/cli/checkpoint/temporary.go @@ -945,6 +945,8 @@ func addDirectoryToEntriesWithAbsPath(repo *git.Repository, dirPathAbs, dirPathR treePath := filepath.ToSlash(filepath.Join(dirPathRel, relWithinDir)) + // Use redacted blob creation for metadata files (transcripts, prompts, etc.) + // to ensure PII and secrets are redacted before writing to git. blobHash, mode, err := createRedactedBlobFromFile(repo, path, treePath) if err != nil { return fmt.Errorf("failed to create blob for %s: %w", path, err) diff --git a/cmd/entire/cli/doctor.go b/cmd/entire/cli/doctor.go index 1b5a0a940..a19b06155 100644 --- a/cmd/entire/cli/doctor.go +++ b/cmd/entire/cli/doctor.go @@ -38,6 +38,9 @@ For each stuck session, you can choose to: Use --force to condense all fixable sessions without prompting. Sessions that can't be condensed will be discarded.`, + PreRun: func(_ *cobra.Command, _ []string) { + strategy.EnsureRedactionConfigured() + }, RunE: func(cmd *cobra.Command, _ []string) error { return runSessionsFix(cmd, forceFlag) }, diff --git a/cmd/entire/cli/hooks_git_cmd.go b/cmd/entire/cli/hooks_git_cmd.go index e97156676..e2db5bbd9 100644 --- a/cmd/entire/cli/hooks_git_cmd.go +++ b/cmd/entire/cli/hooks_git_cmd.go @@ -69,6 +69,10 @@ func initHookLogging() func() { // Init failed - logging will use stderr fallback return func() {} } + + // Configure PII redaction once at startup (reads settings, no-op if disabled). + strategy.EnsureRedactionConfigured() + return logging.Close } diff --git a/cmd/entire/cli/settings/settings.go b/cmd/entire/cli/settings/settings.go index 381c9993a..73316a8c0 100644 --- a/cmd/entire/cli/settings/settings.go +++ b/cmd/entire/cli/settings/settings.go @@ -49,6 +49,24 @@ type EntireSettings struct { // Telemetry controls anonymous usage analytics. // nil = not asked yet (show prompt), true = opted in, false = opted out Telemetry *bool `json:"telemetry,omitempty"` + + // Redaction configures PII redaction behavior for transcripts and metadata. + Redaction *RedactionSettings `json:"redaction,omitempty"` +} + +// RedactionSettings configures redaction behavior beyond the default secret detection. +type RedactionSettings struct { + PII *PIISettings `json:"pii,omitempty"` +} + +// PIISettings configures PII detection categories. +// When Enabled is true, email and phone default to true; address defaults to false. +type PIISettings struct { + Enabled bool `json:"enabled"` + Email *bool `json:"email,omitempty"` + Phone *bool `json:"phone,omitempty"` + Address *bool `json:"address,omitempty"` + CustomPatterns map[string]string `json:"custom_patterns,omitempty"` } // Load loads the Entire settings from .entire/settings.json, @@ -204,6 +222,15 @@ func mergeJSON(settings *EntireSettings, data []byte) error { settings.Telemetry = &t } + // Override redaction if present + if redactionRaw, ok := raw["redaction"]; ok { + var r RedactionSettings + if err := json.Unmarshal(redactionRaw, &r); err != nil { + return fmt.Errorf("parsing redaction field: %w", err) + } + settings.Redaction = &r + } + return nil } diff --git a/cmd/entire/cli/settings/settings_test.go b/cmd/entire/cli/settings/settings_test.go index ad09bc57a..d2329426b 100644 --- a/cmd/entire/cli/settings/settings_test.go +++ b/cmd/entire/cli/settings/settings_test.go @@ -59,7 +59,8 @@ func TestLoad_AcceptsValidKeys(t *testing.T) { "local_dev": false, "log_level": "debug", "strategy_options": {"key": "value"}, - "telemetry": true + "telemetry": true, + "redaction": {"pii": {"enabled": true, "email": true, "phone": false}} }` if err := os.WriteFile(settingsFile, []byte(settingsContent), 0644); err != nil { t.Fatalf("failed to write settings file: %v", err) @@ -92,6 +93,21 @@ func TestLoad_AcceptsValidKeys(t *testing.T) { if settings.Telemetry == nil || !*settings.Telemetry { t.Error("expected telemetry to be true") } + if settings.Redaction == nil { + t.Fatal("expected redaction to be non-nil") + } + if settings.Redaction.PII == nil { + t.Fatal("expected redaction.pii to be non-nil") + } + if !settings.Redaction.PII.Enabled { + t.Error("expected redaction.pii.enabled to be true") + } + if settings.Redaction.PII.Email == nil || !*settings.Redaction.PII.Email { + t.Error("expected redaction.pii.email to be true") + } + if settings.Redaction.PII.Phone == nil || *settings.Redaction.PII.Phone { + t.Error("expected redaction.pii.phone to be false") + } } func TestLoad_LocalSettingsRejectsUnknownKeys(t *testing.T) { @@ -135,6 +151,74 @@ func TestLoad_LocalSettingsRejectsUnknownKeys(t *testing.T) { } } +func TestLoad_MissingRedactionIsNil(t *testing.T) { + tmpDir := t.TempDir() + entireDir := filepath.Join(tmpDir, ".entire") + if err := os.MkdirAll(entireDir, 0755); err != nil { + t.Fatalf("failed to create .entire directory: %v", err) + } + + settingsFile := filepath.Join(entireDir, "settings.json") + if err := os.WriteFile(settingsFile, []byte(`{"strategy": "manual-commit"}`), 0644); err != nil { + t.Fatalf("failed to write settings file: %v", err) + } + if err := os.MkdirAll(filepath.Join(tmpDir, ".git"), 0755); err != nil { + t.Fatalf("failed to create .git directory: %v", err) + } + t.Chdir(tmpDir) + + settings, err := Load() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if settings.Redaction != nil { + t.Error("expected redaction to be nil when not in settings") + } +} + +func TestLoad_LocalOverridesRedaction(t *testing.T) { + tmpDir := t.TempDir() + entireDir := filepath.Join(tmpDir, ".entire") + if err := os.MkdirAll(entireDir, 0755); err != nil { + t.Fatalf("failed to create .entire directory: %v", err) + } + + // Base settings: PII disabled + settingsFile := filepath.Join(entireDir, "settings.json") + if err := os.WriteFile(settingsFile, []byte(`{"strategy": "manual-commit", "redaction": {"pii": {"enabled": false}}}`), 0644); err != nil { + t.Fatalf("failed to write settings file: %v", err) + } + + // Local override: PII enabled with custom patterns + localFile := filepath.Join(entireDir, "settings.local.json") + localContent := `{"redaction": {"pii": {"enabled": true, "custom_patterns": {"employee_id": "EMP-\\d{6}"}}}}` + if err := os.WriteFile(localFile, []byte(localContent), 0644); err != nil { + t.Fatalf("failed to write local settings file: %v", err) + } + + if err := os.MkdirAll(filepath.Join(tmpDir, ".git"), 0755); err != nil { + t.Fatalf("failed to create .git directory: %v", err) + } + t.Chdir(tmpDir) + + settings, err := Load() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if settings.Redaction == nil || settings.Redaction.PII == nil { + t.Fatal("expected redaction.pii to be non-nil after local override") + } + if !settings.Redaction.PII.Enabled { + t.Error("expected local override to enable PII") + } + if settings.Redaction.PII.CustomPatterns == nil { + t.Fatal("expected custom_patterns to be non-nil") + } + if settings.Redaction.PII.CustomPatterns["employee_id"] != `EMP-\d{6}` { + t.Errorf("expected employee_id pattern, got %v", settings.Redaction.PII.CustomPatterns) + } +} + // containsUnknownField checks if the error message indicates an unknown field func containsUnknownField(msg string) bool { // Go's json package reports unknown fields with this message format diff --git a/cmd/entire/cli/strategy/common.go b/cmd/entire/cli/strategy/common.go index 70a4497d9..87c10716c 100644 --- a/cmd/entire/cli/strategy/common.go +++ b/cmd/entire/cli/strategy/common.go @@ -5,6 +5,7 @@ import ( "encoding/json" "errors" "fmt" + "log/slog" "os" "os/exec" "path/filepath" @@ -16,8 +17,11 @@ import ( "github.com/entireio/cli/cmd/entire/cli/agent" "github.com/entireio/cli/cmd/entire/cli/checkpoint" "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/logging" "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/cmd/entire/cli/settings" "github.com/entireio/cli/cmd/entire/cli/trailers" + "github.com/entireio/cli/redact" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" @@ -238,6 +242,37 @@ var ( protectedDirsCache []string ) +var initRedactionOnce sync.Once + +// EnsureRedactionConfigured loads PII redaction settings and configures the +// redact package. No-op if PII is not enabled in settings. +// Must be called at each process entry point before checkpoint writes +// (e.g., hook PersistentPreRunE, doctor PreRun). +func EnsureRedactionConfigured() { + initRedactionOnce.Do(func() { + s, err := settings.Load() + if err != nil { + logCtx := logging.WithComponent(context.Background(), "redaction") + logging.Warn(logCtx, "failed to load settings for PII redaction", slog.String("error", err.Error())) + return + } + if s.Redaction == nil || s.Redaction.PII == nil || !s.Redaction.PII.Enabled { + return + } + pii := s.Redaction.PII + cfg := redact.PIIConfig{ + Enabled: true, + Categories: make(map[redact.PIICategory]bool), + CustomPatterns: pii.CustomPatterns, + } + // Email and phone default to true when PII is enabled; address defaults to false. + cfg.Categories[redact.PIIEmail] = pii.Email == nil || *pii.Email + cfg.Categories[redact.PIIPhone] = pii.Phone == nil || *pii.Phone + cfg.Categories[redact.PIIAddress] = pii.Address != nil && *pii.Address + redact.ConfigurePII(cfg) + }) +} + // isSpecificAgentType returns true if the agent type is a known, specific value // (not empty and not the generic "Agent" fallback). func isSpecificAgentType(t agent.AgentType) bool { diff --git a/redact/pii.go b/redact/pii.go new file mode 100644 index 000000000..491a99a57 --- /dev/null +++ b/redact/pii.go @@ -0,0 +1,153 @@ +package redact + +import ( + "regexp" + "strings" + "sync" +) + +// PIICategory identifies a category of personally identifiable information. +type PIICategory string + +const ( + PIIEmail PIICategory = "email" + PIIPhone PIICategory = "phone" + PIIAddress PIICategory = "address" +) + +// PIIConfig controls which PII categories are detected and redacted. +type PIIConfig struct { + // Enabled globally enables/disables PII redaction. + // When false, no PII patterns are checked (secrets still redacted). + Enabled bool + + // Categories maps each PII category to whether it is enabled. + // Missing keys default to false (disabled). + Categories map[PIICategory]bool + + // CustomPatterns allows teams to define additional regex patterns. + // Each key is a label used in the replacement token (uppercased), + // and each value is a regex pattern string. + // Example: {"employee_id": `EMP-\d{6}`} produces [REDACTED_EMPLOYEE_ID]. + CustomPatterns map[string]string + + // patterns holds pre-compiled patterns, populated by ConfigurePII. + // When nil (e.g., in tests constructing PIIConfig directly), + // detectPII falls back to compilePIIPatterns. + patterns []piiPattern +} + +// piiPattern is a compiled regex with its replacement token label. +type piiPattern struct { + regex *regexp.Regexp + label string // e.g., "EMAIL", "PHONE", "ADDRESS" +} + +var ( + piiConfig *PIIConfig + piiConfigMu sync.RWMutex +) + +// ConfigurePII sets the global PII redaction configuration. +// Pre-compiles patterns so the hot path (String → detectPII) does no compilation. +// Call once at startup after loading settings. Thread-safe. +func ConfigurePII(cfg PIIConfig) { + piiConfigMu.Lock() + defer piiConfigMu.Unlock() + cfgCopy := cfg + cfgCopy.patterns = compilePIIPatterns(&cfgCopy) + piiConfig = &cfgCopy +} + +// getPIIConfig returns the current PII configuration, or nil if not configured. +func getPIIConfig() *PIIConfig { + piiConfigMu.RLock() + defer piiConfigMu.RUnlock() + return piiConfig +} + +// Pre-compiled builtin PII regexes. +var ( + emailRegex = regexp.MustCompile(`\b[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}\b`) + // phoneRegex uses three branches to avoid false-positives on dotted-decimal + // strings like version numbers (1.234.567.8901) and IPs (192.168.001.0001). + // Dots are only allowed as separators when preceded by +1 (unambiguous intl prefix). + // Without +1, only dashes and spaces are accepted as separators. + phoneRegex = regexp.MustCompile( + `(?:` + + `\+1[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}` + // +1 intl prefix: any separator + `|` + + `(?:1[-\s])?\(\d{3}\)\s?\d{3}[-.\s]?\d{4}` + // parenthesized area code + `|` + + `(?:1[-\s])?\d{3}[-\s]\d{3}[-\s]\d{4}` + // bare digits: dash/space only + `)`, + ) + addressRegex = regexp.MustCompile(`\d{1,5}\s+[A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*\s+(?:St(?:reet)?|Ave(?:nue)?|Blvd|Boulevard|Dr(?:ive)?|Ln|Lane|Rd|Road|Ct|Court|Pl(?:ace)?|Way|Cir(?:cle)?|Ter(?:race)?|Pkwy|Parkway)\.?`) +) + +// builtinPIIPattern associates a compiled regex with a category and label. +type builtinPIIPattern struct { + category PIICategory + label string + regex *regexp.Regexp +} + +// builtinPIIPatterns is the set of default PII detection patterns. +var builtinPIIPatterns = []builtinPIIPattern{ + {PIIEmail, "EMAIL", emailRegex}, + {PIIPhone, "PHONE", phoneRegex}, + {PIIAddress, "ADDRESS", addressRegex}, +} + +// detectPII returns tagged regions for PII matches in s. +// Returns nil immediately if PII redaction is not configured or not enabled. +func detectPII(cfg *PIIConfig, s string) []taggedRegion { + if cfg == nil || !cfg.Enabled { + return nil + } + + patterns := cfg.patterns + if patterns == nil { + patterns = compilePIIPatterns(cfg) + } + var regions []taggedRegion + for _, p := range patterns { + for _, loc := range p.regex.FindAllStringIndex(s, -1) { + regions = append(regions, taggedRegion{ + region: region{loc[0], loc[1]}, + label: p.label, + }) + } + } + return regions +} + +// compilePIIPatterns builds the pattern list from config. +// Builtin regexes are pre-compiled package vars; only custom patterns +// need compilation here. +func compilePIIPatterns(cfg *PIIConfig) []piiPattern { + var patterns []piiPattern + for _, bp := range builtinPIIPatterns { + if enabled, ok := cfg.Categories[bp.category]; ok && enabled { + patterns = append(patterns, piiPattern{regex: bp.regex, label: bp.label}) + } + } + for label, pattern := range cfg.CustomPatterns { + compiled, err := regexp.Compile(pattern) + if err != nil { + continue // skip invalid custom patterns silently + } + patterns = append(patterns, piiPattern{regex: compiled, label: strings.ToUpper(label)}) + } + return patterns +} + +// replacementToken returns the redaction placeholder for a given label. +// Empty label (secrets) returns "REDACTED" for backward compatibility. +// Non-empty label (PII) returns "[REDACTED_