Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/entire/cli/checkpoint/temporary.go
Original file line number Diff line number Diff line change
Expand Up @@ -945,6 +945,8 @@ func addDirectoryToEntriesWithAbsPath(repo *git.Repository, dirPathAbs, dirPathR

treePath := filepath.ToSlash(filepath.Join(dirPathRel, relWithinDir))

// Use redacted blob creation for metadata files (transcripts, prompts, etc.)
// to ensure PII and secrets are redacted before writing to git.
blobHash, mode, err := createRedactedBlobFromFile(repo, path, treePath)
if err != nil {
return fmt.Errorf("failed to create blob for %s: %w", path, err)
Expand Down
3 changes: 3 additions & 0 deletions cmd/entire/cli/doctor.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ For each stuck session, you can choose to:

Use --force to condense all fixable sessions without prompting. Sessions that can't
be condensed will be discarded.`,
PreRun: func(_ *cobra.Command, _ []string) {
strategy.EnsureRedactionConfigured()
},
RunE: func(cmd *cobra.Command, _ []string) error {
return runSessionsFix(cmd, forceFlag)
},
Expand Down
4 changes: 4 additions & 0 deletions cmd/entire/cli/hooks_git_cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ func initHookLogging() func() {
// Init failed - logging will use stderr fallback
return func() {}
}

// Configure PII redaction once at startup (reads settings, no-op if disabled).
strategy.EnsureRedactionConfigured()

return logging.Close
}

Expand Down
27 changes: 27 additions & 0 deletions cmd/entire/cli/settings/settings.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,24 @@ type EntireSettings struct {
// Telemetry controls anonymous usage analytics.
// nil = not asked yet (show prompt), true = opted in, false = opted out
Telemetry *bool `json:"telemetry,omitempty"`

// Redaction configures PII redaction behavior for transcripts and metadata.
Redaction *RedactionSettings `json:"redaction,omitempty"`
}

// RedactionSettings configures redaction behavior beyond the default secret detection.
type RedactionSettings struct {
PII *PIISettings `json:"pii,omitempty"`
}

// PIISettings configures PII detection categories.
// When Enabled is true, email and phone default to true; address defaults to false.
type PIISettings struct {
Enabled bool `json:"enabled"`
Email *bool `json:"email,omitempty"`
Phone *bool `json:"phone,omitempty"`
Address *bool `json:"address,omitempty"`
CustomPatterns map[string]string `json:"custom_patterns,omitempty"`
}

// Load loads the Entire settings from .entire/settings.json,
Expand Down Expand Up @@ -204,6 +222,15 @@ func mergeJSON(settings *EntireSettings, data []byte) error {
settings.Telemetry = &t
}

// Override redaction if present
if redactionRaw, ok := raw["redaction"]; ok {
var r RedactionSettings
if err := json.Unmarshal(redactionRaw, &r); err != nil {
return fmt.Errorf("parsing redaction field: %w", err)
}
settings.Redaction = &r
}

return nil
}

Expand Down
86 changes: 85 additions & 1 deletion cmd/entire/cli/settings/settings_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ func TestLoad_AcceptsValidKeys(t *testing.T) {
"local_dev": false,
"log_level": "debug",
"strategy_options": {"key": "value"},
"telemetry": true
"telemetry": true,
"redaction": {"pii": {"enabled": true, "email": true, "phone": false}}
}`
if err := os.WriteFile(settingsFile, []byte(settingsContent), 0644); err != nil {
t.Fatalf("failed to write settings file: %v", err)
Expand Down Expand Up @@ -92,6 +93,21 @@ func TestLoad_AcceptsValidKeys(t *testing.T) {
if settings.Telemetry == nil || !*settings.Telemetry {
t.Error("expected telemetry to be true")
}
if settings.Redaction == nil {
t.Fatal("expected redaction to be non-nil")
}
if settings.Redaction.PII == nil {
t.Fatal("expected redaction.pii to be non-nil")
}
if !settings.Redaction.PII.Enabled {
t.Error("expected redaction.pii.enabled to be true")
}
if settings.Redaction.PII.Email == nil || !*settings.Redaction.PII.Email {
t.Error("expected redaction.pii.email to be true")
}
if settings.Redaction.PII.Phone == nil || *settings.Redaction.PII.Phone {
t.Error("expected redaction.pii.phone to be false")
}
}

func TestLoad_LocalSettingsRejectsUnknownKeys(t *testing.T) {
Expand Down Expand Up @@ -135,6 +151,74 @@ func TestLoad_LocalSettingsRejectsUnknownKeys(t *testing.T) {
}
}

func TestLoad_MissingRedactionIsNil(t *testing.T) {
tmpDir := t.TempDir()
entireDir := filepath.Join(tmpDir, ".entire")
if err := os.MkdirAll(entireDir, 0755); err != nil {
t.Fatalf("failed to create .entire directory: %v", err)
}

settingsFile := filepath.Join(entireDir, "settings.json")
if err := os.WriteFile(settingsFile, []byte(`{"strategy": "manual-commit"}`), 0644); err != nil {
t.Fatalf("failed to write settings file: %v", err)
}
if err := os.MkdirAll(filepath.Join(tmpDir, ".git"), 0755); err != nil {
t.Fatalf("failed to create .git directory: %v", err)
}
t.Chdir(tmpDir)

settings, err := Load()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if settings.Redaction != nil {
t.Error("expected redaction to be nil when not in settings")
}
}

func TestLoad_LocalOverridesRedaction(t *testing.T) {
tmpDir := t.TempDir()
entireDir := filepath.Join(tmpDir, ".entire")
if err := os.MkdirAll(entireDir, 0755); err != nil {
t.Fatalf("failed to create .entire directory: %v", err)
}

// Base settings: PII disabled
settingsFile := filepath.Join(entireDir, "settings.json")
if err := os.WriteFile(settingsFile, []byte(`{"strategy": "manual-commit", "redaction": {"pii": {"enabled": false}}}`), 0644); err != nil {
t.Fatalf("failed to write settings file: %v", err)
}

// Local override: PII enabled with custom patterns
localFile := filepath.Join(entireDir, "settings.local.json")
localContent := `{"redaction": {"pii": {"enabled": true, "custom_patterns": {"employee_id": "EMP-\\d{6}"}}}}`
if err := os.WriteFile(localFile, []byte(localContent), 0644); err != nil {
t.Fatalf("failed to write local settings file: %v", err)
}

if err := os.MkdirAll(filepath.Join(tmpDir, ".git"), 0755); err != nil {
t.Fatalf("failed to create .git directory: %v", err)
}
t.Chdir(tmpDir)

settings, err := Load()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if settings.Redaction == nil || settings.Redaction.PII == nil {
t.Fatal("expected redaction.pii to be non-nil after local override")
}
if !settings.Redaction.PII.Enabled {
t.Error("expected local override to enable PII")
}
if settings.Redaction.PII.CustomPatterns == nil {
t.Fatal("expected custom_patterns to be non-nil")
}
if settings.Redaction.PII.CustomPatterns["employee_id"] != `EMP-\d{6}` {
t.Errorf("expected employee_id pattern, got %v", settings.Redaction.PII.CustomPatterns)
}
}

// containsUnknownField checks if the error message indicates an unknown field
func containsUnknownField(msg string) bool {
// Go's json package reports unknown fields with this message format
Expand Down
35 changes: 35 additions & 0 deletions cmd/entire/cli/strategy/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"errors"
"fmt"
"log/slog"
"os"
"os/exec"
"path/filepath"
Expand All @@ -16,8 +17,11 @@ import (
"github.com/entireio/cli/cmd/entire/cli/agent"
"github.com/entireio/cli/cmd/entire/cli/checkpoint"
"github.com/entireio/cli/cmd/entire/cli/checkpoint/id"
"github.com/entireio/cli/cmd/entire/cli/logging"
"github.com/entireio/cli/cmd/entire/cli/paths"
"github.com/entireio/cli/cmd/entire/cli/settings"
"github.com/entireio/cli/cmd/entire/cli/trailers"
"github.com/entireio/cli/redact"

"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
Expand Down Expand Up @@ -238,6 +242,37 @@ var (
protectedDirsCache []string
)

var initRedactionOnce sync.Once

// EnsureRedactionConfigured loads PII redaction settings and configures the
// redact package. No-op if PII is not enabled in settings.
// Must be called at each process entry point before checkpoint writes
// (e.g., hook PersistentPreRunE, doctor PreRun).
func EnsureRedactionConfigured() {
initRedactionOnce.Do(func() {
s, err := settings.Load()
if err != nil {
logCtx := logging.WithComponent(context.Background(), "redaction")
logging.Warn(logCtx, "failed to load settings for PII redaction", slog.String("error", err.Error()))
return
}
if s.Redaction == nil || s.Redaction.PII == nil || !s.Redaction.PII.Enabled {
return
}
pii := s.Redaction.PII
cfg := redact.PIIConfig{
Enabled: true,
Categories: make(map[redact.PIICategory]bool),
CustomPatterns: pii.CustomPatterns,
}
// Email and phone default to true when PII is enabled; address defaults to false.
cfg.Categories[redact.PIIEmail] = pii.Email == nil || *pii.Email
cfg.Categories[redact.PIIPhone] = pii.Phone == nil || *pii.Phone
cfg.Categories[redact.PIIAddress] = pii.Address != nil && *pii.Address
redact.ConfigurePII(cfg)
})
}

// isSpecificAgentType returns true if the agent type is a known, specific value
// (not empty and not the generic "Agent" fallback).
func isSpecificAgentType(t agent.AgentType) bool {
Expand Down
153 changes: 153 additions & 0 deletions redact/pii.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package redact

import (
"regexp"
"strings"
"sync"
)

// PIICategory identifies a category of personally identifiable information.
type PIICategory string

const (
PIIEmail PIICategory = "email"
PIIPhone PIICategory = "phone"
PIIAddress PIICategory = "address"
)

// PIIConfig controls which PII categories are detected and redacted.
type PIIConfig struct {
// Enabled globally enables/disables PII redaction.
// When false, no PII patterns are checked (secrets still redacted).
Enabled bool

// Categories maps each PII category to whether it is enabled.
// Missing keys default to false (disabled).
Categories map[PIICategory]bool

// CustomPatterns allows teams to define additional regex patterns.
// Each key is a label used in the replacement token (uppercased),
// and each value is a regex pattern string.
// Example: {"employee_id": `EMP-\d{6}`} produces [REDACTED_EMPLOYEE_ID].
CustomPatterns map[string]string

// patterns holds pre-compiled patterns, populated by ConfigurePII.
// When nil (e.g., in tests constructing PIIConfig directly),
// detectPII falls back to compilePIIPatterns.
patterns []piiPattern
}

// piiPattern is a compiled regex with its replacement token label.
type piiPattern struct {
regex *regexp.Regexp
label string // e.g., "EMAIL", "PHONE", "ADDRESS"
}

var (
piiConfig *PIIConfig
piiConfigMu sync.RWMutex
)

// ConfigurePII sets the global PII redaction configuration.
// Pre-compiles patterns so the hot path (String → detectPII) does no compilation.
// Call once at startup after loading settings. Thread-safe.
func ConfigurePII(cfg PIIConfig) {
piiConfigMu.Lock()
defer piiConfigMu.Unlock()
cfgCopy := cfg
cfgCopy.patterns = compilePIIPatterns(&cfgCopy)
piiConfig = &cfgCopy
}

// getPIIConfig returns the current PII configuration, or nil if not configured.
func getPIIConfig() *PIIConfig {
piiConfigMu.RLock()
defer piiConfigMu.RUnlock()
return piiConfig
}

// Pre-compiled builtin PII regexes.
var (
emailRegex = regexp.MustCompile(`\b[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}\b`)
// phoneRegex uses three branches to avoid false-positives on dotted-decimal
// strings like version numbers (1.234.567.8901) and IPs (192.168.001.0001).
// Dots are only allowed as separators when preceded by +1 (unambiguous intl prefix).
// Without +1, only dashes and spaces are accepted as separators.
phoneRegex = regexp.MustCompile(
`(?:` +
`\+1[-.\s]?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}` + // +1 intl prefix: any separator
`|` +
`(?:1[-\s])?\(\d{3}\)\s?\d{3}[-.\s]?\d{4}` + // parenthesized area code
`|` +
`(?:1[-\s])?\d{3}[-\s]\d{3}[-\s]\d{4}` + // bare digits: dash/space only
`)`,
)
addressRegex = regexp.MustCompile(`\d{1,5}\s+[A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*\s+(?:St(?:reet)?|Ave(?:nue)?|Blvd|Boulevard|Dr(?:ive)?|Ln|Lane|Rd|Road|Ct|Court|Pl(?:ace)?|Way|Cir(?:cle)?|Ter(?:race)?|Pkwy|Parkway)\.?`)
)

// builtinPIIPattern associates a compiled regex with a category and label.
type builtinPIIPattern struct {
category PIICategory
label string
regex *regexp.Regexp
}

// builtinPIIPatterns is the set of default PII detection patterns.
var builtinPIIPatterns = []builtinPIIPattern{
{PIIEmail, "EMAIL", emailRegex},
{PIIPhone, "PHONE", phoneRegex},
{PIIAddress, "ADDRESS", addressRegex},
}

// detectPII returns tagged regions for PII matches in s.
// Returns nil immediately if PII redaction is not configured or not enabled.
func detectPII(cfg *PIIConfig, s string) []taggedRegion {
if cfg == nil || !cfg.Enabled {
return nil
}

patterns := cfg.patterns
if patterns == nil {
patterns = compilePIIPatterns(cfg)
}
var regions []taggedRegion
for _, p := range patterns {
for _, loc := range p.regex.FindAllStringIndex(s, -1) {
regions = append(regions, taggedRegion{
region: region{loc[0], loc[1]},
label: p.label,
})
}
}
return regions
}

// compilePIIPatterns builds the pattern list from config.
// Builtin regexes are pre-compiled package vars; only custom patterns
// need compilation here.
func compilePIIPatterns(cfg *PIIConfig) []piiPattern {
var patterns []piiPattern
for _, bp := range builtinPIIPatterns {
if enabled, ok := cfg.Categories[bp.category]; ok && enabled {
patterns = append(patterns, piiPattern{regex: bp.regex, label: bp.label})
}
}
for label, pattern := range cfg.CustomPatterns {
compiled, err := regexp.Compile(pattern)
if err != nil {
continue // skip invalid custom patterns silently
}
patterns = append(patterns, piiPattern{regex: compiled, label: strings.ToUpper(label)})
}
return patterns
}

// replacementToken returns the redaction placeholder for a given label.
// Empty label (secrets) returns "REDACTED" for backward compatibility.
// Non-empty label (PII) returns "[REDACTED_<LABEL>]".
func replacementToken(label string) string {
if label == "" {
return "REDACTED"
}
return "[REDACTED_" + label + "]"
}
Loading