Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 8 additions & 12 deletions app/privkeylock/privkeylock.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func New(privKeyFilePath, clusterLockFilePath, command string) (Service, error)

content, err := os.ReadFile(privKeyFileLockPath)
if errors.Is(err, os.ErrNotExist) { //nolint:revive // Empty block is fine.
// No file, we will create it in run
// No file, we will create it in Run
} else if err != nil {
return Service{}, errors.Wrap(err, "read private key lock file", z.Str("path", privKeyFileLockPath))
} else {
Expand All @@ -75,7 +75,7 @@ func New(privKeyFilePath, clusterLockFilePath, command string) (Service, error)

if time.Since(meta.Timestamp) <= staleDuration {
return Service{}, errors.New(
"existing private key lock file found, another charon instance may be running on your machine",
"private key lock file is recently updated, another charon instance may be running on your machine",
z.Str("path", privKeyFileLockPath),
z.Str("command", meta.Command),
z.Str("cluster_lock_hash", meta.ClusterLockHash),
Expand All @@ -100,7 +100,7 @@ func New(privKeyFilePath, clusterLockFilePath, command string) (Service, error)
}
}

if err := writePrivateKeyLockFile(privKeyFileLockPath, clusterLockHash, command, time.Now()); err != nil {
if err := overwritePrivateKeyLockFile(privKeyFileLockPath, clusterLockHash, command, time.Now()); err != nil {
return Service{}, err
}

Expand All @@ -124,7 +124,8 @@ type Service struct {
done chan struct{} // Done is closed when Run exits, which exits the Close goroutine.
}

// Run runs the service, updating the lock file every second and deleting it on context cancellation.
// Run runs the service, periodically updating the lock file.
// It does NOT delete the lock file on exit; callers are responsible for any cleanup.
func (s Service) Run() error {
defer close(s.done)

Expand All @@ -134,14 +135,9 @@ func (s Service) Run() error {
for {
select {
case <-s.quit:
if err := os.Remove(s.lockFilePath); err != nil {
return errors.Wrap(err, "delete private key lock file")
}

return nil
case <-tick.C:
// Overwrite lockfile with new metadata
if err := writePrivateKeyLockFile(s.lockFilePath, s.clusterLockHash, s.command, time.Now()); err != nil {
if err := overwritePrivateKeyLockFile(s.lockFilePath, s.clusterLockHash, s.command, time.Now()); err != nil {
return err
}
}
Expand All @@ -162,8 +158,8 @@ type metadata struct {
ClusterLockHash string `json:"cluster_lock_hash,omitempty"`
}

// writePrivateKeyLockFile creates or updates the lock file with the latest metadata.
func writePrivateKeyLockFile(path, clusterLockHash, command string, now time.Time) error {
// overwritePrivateKeyLockFile creates or updates the lock file with the latest metadata.
func overwritePrivateKeyLockFile(path, clusterLockHash, command string, now time.Time) error {
b, err := json.Marshal(metadata{Command: command, Timestamp: now, ClusterLockHash: clusterLockHash})
if err != nil {
return errors.Wrap(err, "marshal private key lock file")
Expand Down
18 changes: 9 additions & 9 deletions app/privkeylock/privkeylock_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func TestService(t *testing.T) {
writePrivateKeyFile(t, privKeyPath)

// Create a stale file that is ignored.
err := writePrivateKeyLockFile(lockPath, "hash123", "test", time.Now().Add(-staleDuration))
err := overwritePrivateKeyLockFile(lockPath, "hash123", "test", time.Now().Add(-staleDuration))
require.NoError(t, err)

// Create a new service.
Expand All @@ -38,7 +38,7 @@ func TestService(t *testing.T) {

// Assert a new service can't be created.
_, err = New(privKeyPath, lockFilePath, "test")
require.ErrorContains(t, err, "existing private key lock file found")
require.ErrorContains(t, err, "private key lock file is recently updated")

// Delete the file so Run will create it again.
require.NoError(t, os.Remove(lockPath))
Expand All @@ -55,9 +55,9 @@ func TestService(t *testing.T) {

require.NoError(t, eg.Wait())

// Assert the file is deleted.
// Assert the file is not deleted.
_, openErr := os.Open(lockPath)
require.ErrorIs(t, openErr, os.ErrNotExist)
require.NoError(t, openErr)
}

func TestClusterHashMismatchWithinGracePeriod(t *testing.T) {
Expand All @@ -71,7 +71,7 @@ func TestClusterHashMismatchWithinGracePeriod(t *testing.T) {
writePrivateKeyFile(t, privKeyPath)

// Create a stale but within grace period lock file with hash1.
err := writePrivateKeyLockFile(lockPath, "hash1", "test", time.Now().Add(-staleDuration-time.Second))
err := overwritePrivateKeyLockFile(lockPath, "hash1", "test", time.Now().Add(-staleDuration-time.Second))
require.NoError(t, err)

// Update cluster lock file to hash2.
Expand All @@ -95,7 +95,7 @@ func TestClusterHashMismatchAfterGracePeriod(t *testing.T) {
writePrivateKeyFile(t, privKeyPath)

// Create an old lock file with hash1 (beyond grace period).
err := writePrivateKeyLockFile(lockPath, "hash1", "test", time.Now().Add(-gracePeriod-time.Second))
err := overwritePrivateKeyLockFile(lockPath, "hash1", "test", time.Now().Add(-gracePeriod-time.Second))
require.NoError(t, err)

// Update cluster lock file to hash2.
Expand Down Expand Up @@ -127,7 +127,7 @@ func TestClusterHashMatchWithinGracePeriod(t *testing.T) {
writePrivateKeyFile(t, privKeyPath)

// Create a recent lock file with hash1 (within stale duration).
err := writePrivateKeyLockFile(lockPath, "hash1", "test", time.Now().Add(-time.Second))
err := overwritePrivateKeyLockFile(lockPath, "hash1", "test", time.Now().Add(-time.Second))
require.NoError(t, err)

// Try to create service with same hash - should fail due to staleness check.
Expand All @@ -136,7 +136,7 @@ func TestClusterHashMatchWithinGracePeriod(t *testing.T) {
require.ErrorContains(t, err, "another charon instance may be running")

// Now create a stale lock file with same hash (beyond stale duration but within grace period).
err = writePrivateKeyLockFile(lockPath, "hash1", "test", time.Now().Add(-staleDuration-time.Second))
err = overwritePrivateKeyLockFile(lockPath, "hash1", "test", time.Now().Add(-staleDuration-time.Second))
require.NoError(t, err)

// Should succeed since hash matches and file is stale.
Expand Down Expand Up @@ -211,7 +211,7 @@ func TestEmptyHashToHashMigration(t *testing.T) {
writePrivateKeyFile(t, privKeyPath)

// Create a stale lock file with empty cluster hash (migration scenario).
err := writePrivateKeyLockFile(lockPath, "", "test", time.Now().Add(-staleDuration*2))
err := overwritePrivateKeyLockFile(lockPath, "", "test", time.Now().Add(-staleDuration*2))
require.NoError(t, err)

// Should succeed - empty hash shouldn't trigger grace period.
Expand Down
2 changes: 1 addition & 1 deletion cmd/createcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ func detectNodeDirs(clusterDir string, nodeAmount int) error {
return errors.Wrap(err, "absolute path retrieval")
}

if _, err := os.Stat(filepath.Join(absPath, "cluster-lock.json")); err == nil {
if _, err := os.Stat(filepath.Join(absPath, clusterLockFile)); err == nil {
return errors.New("existing node directory found, please delete it before running this command", z.Str("node_path", absPath))
}
}
Expand Down
4 changes: 3 additions & 1 deletion cmd/dkg.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ import (
"github.com/obolnetwork/charon/dkg"
)

var defaultDKGRelays = []string{"https://4.relay.obol.dev"}

func newDKGCmd(runFunc func(context.Context, dkg.Config) error) *cobra.Command {
var config dkg.Config

Expand Down Expand Up @@ -50,7 +52,7 @@ this command at the same time.`,
bindKeymanagerFlags(cmd.Flags(), &config.KeymanagerAddr, &config.KeymanagerAuthToken)
bindDefDirFlag(cmd.Flags(), &config.DefFile)
bindNoVerifyFlag(cmd.Flags(), &config.NoVerify)
bindP2PFlags(cmd, &config.P2P)
bindP2PFlags(cmd, &config.P2P, defaultDKGRelays...)
bindLogFlags(cmd.Flags(), &config.Log)
bindPublishFlags(cmd.Flags(), &config)
bindShutdownDelayFlag(cmd.Flags(), &config.ShutdownDelay)
Expand Down
6 changes: 1 addition & 5 deletions cmd/edit_addoperators.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@ import (
"github.com/obolnetwork/charon/eth2util/enr"
)

const (
defaultAlphaRelay = "https://4.relay.obol.dev"
)

func newAddOperatorsCmd(runFunc func(context.Context, dkg.AddOperatorsConfig, dkg.Config) error) *cobra.Command {
var (
config dkg.AddOperatorsConfig
Expand Down Expand Up @@ -52,7 +48,7 @@ func newAddOperatorsCmd(runFunc func(context.Context, dkg.AddOperatorsConfig, dk
cmd.Flags().DurationVar(&dkgConfig.Timeout, "timeout", time.Minute, "Timeout for the protocol, should be increased if protocol times out.")

bindNoVerifyFlag(cmd.Flags(), &dkgConfig.NoVerify)
bindP2PFlags(cmd, &dkgConfig.P2P, defaultAlphaRelay)
bindP2PFlags(cmd, &dkgConfig.P2P, defaultDKGRelays...)
bindLogFlags(cmd.Flags(), &dkgConfig.Log)
bindEth1Flag(cmd.Flags(), &dkgConfig.ExecutionEngineAddr)
bindShutdownDelayFlag(cmd.Flags(), &dkgConfig.ShutdownDelay)
Expand Down
2 changes: 1 addition & 1 deletion cmd/edit_addvalidators.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func newAddValidatorsCmd(runFunc func(context.Context, addValidatorsConfig) erro
// Bind `dkg` flags.
bindKeymanagerFlags(cmd.Flags(), &config.DKG.KeymanagerAddr, &config.DKG.KeymanagerAuthToken)
bindNoVerifyFlag(cmd.Flags(), &config.DKG.NoVerify)
bindP2PFlags(cmd, &config.DKG.P2P, defaultAlphaRelay)
bindP2PFlags(cmd, &config.DKG.P2P, defaultDKGRelays...)
bindLogFlags(cmd.Flags(), &config.DKG.Log)
bindShutdownDelayFlag(cmd.Flags(), &config.DKG.ShutdownDelay)
bindEth1Flag(cmd.Flags(), &config.DKG.ExecutionEngineAddr)
Expand Down
2 changes: 1 addition & 1 deletion cmd/edit_recreateprivatekeys.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func newRecreatePrivateKeysCmd(runFunc func(context.Context, dkg.ReshareConfig)
cmd.Flags().DurationVar(&config.DKGConfig.Timeout, "timeout", time.Minute, "Timeout for the protocol, should be increased if protocol times out.")

bindNoVerifyFlag(cmd.Flags(), &config.DKGConfig.NoVerify)
bindP2PFlags(cmd, &config.DKGConfig.P2P, defaultAlphaRelay)
bindP2PFlags(cmd, &config.DKGConfig.P2P, defaultDKGRelays...)
bindLogFlags(cmd.Flags(), &config.DKGConfig.Log)
bindEth1Flag(cmd.Flags(), &config.DKGConfig.ExecutionEngineAddr)
bindShutdownDelayFlag(cmd.Flags(), &config.DKGConfig.ShutdownDelay)
Expand Down
2 changes: 1 addition & 1 deletion cmd/edit_removeoperators.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func newRemoveOperatorsCmd(runFunc func(context.Context, dkg.RemoveOperatorsConf
cmd.Flags().StringSliceVar(&config.ParticipatingENRs, "participating-operator-enrs", nil, "Comma-separated list of operator ENRs participating in the ceremony. Required if --operator-enrs-to-remove specifies more operators to remove than the fault tolerance of the current cluster.")

bindNoVerifyFlag(cmd.Flags(), &dkgConfig.NoVerify)
bindP2PFlags(cmd, &dkgConfig.P2P, defaultAlphaRelay)
bindP2PFlags(cmd, &dkgConfig.P2P, defaultDKGRelays...)
bindLogFlags(cmd.Flags(), &dkgConfig.Log)
bindEth1Flag(cmd.Flags(), &dkgConfig.ExecutionEngineAddr)
bindShutdownDelayFlag(cmd.Flags(), &dkgConfig.ShutdownDelay)
Expand Down
2 changes: 1 addition & 1 deletion cmd/edit_replaceoperator.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ func newReplaceOperatorCmd(runFunc func(context.Context, dkg.ReplaceOperatorConf
cmd.Flags().DurationVar(&dkgConfig.Timeout, "timeout", time.Minute, "Timeout for the protocol, should be increased if protocol times out.")

bindNoVerifyFlag(cmd.Flags(), &dkgConfig.NoVerify)
bindP2PFlags(cmd, &dkgConfig.P2P, defaultAlphaRelay)
bindP2PFlags(cmd, &dkgConfig.P2P, defaultDKGRelays...)
bindLogFlags(cmd.Flags(), &dkgConfig.Log)
bindEth1Flag(cmd.Flags(), &dkgConfig.ExecutionEngineAddr)
bindShutdownDelayFlag(cmd.Flags(), &dkgConfig.ShutdownDelay)
Expand Down
2 changes: 1 addition & 1 deletion cmd/testall.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ func newTestAllCmd(runFunc func(context.Context, io.Writer, testAllConfig) error
bindTestMEVFlags(cmd, &config.MEV, "mev-")
bindTestInfraFlags(cmd, &config.Infra, "infra-")

bindP2PFlags(cmd, &config.Peers.P2P, defaultAlphaRelay)
bindP2PFlags(cmd, &config.Peers.P2P, defaultDKGRelays...)
bindTestLogFlags(cmd.Flags(), &config.Peers.Log)

wrapPreRunE(cmd, func(cmd *cobra.Command, _ []string) error {
Expand Down
2 changes: 1 addition & 1 deletion cmd/testpeers.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func newTestPeersCmd(runFunc func(context.Context, io.Writer, testPeersConfig) (

bindTestFlags(cmd, &config.testConfig)
bindTestPeersFlags(cmd, &config, "")
bindP2PFlags(cmd, &config.P2P, defaultAlphaRelay)
bindP2PFlags(cmd, &config.P2P, defaultDKGRelays...)
bindTestLogFlags(cmd.Flags(), &config.Log)

wrapPreRunE(cmd, func(cmd *cobra.Command, _ []string) error {
Expand Down
8 changes: 4 additions & 4 deletions dkg/disk.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func writeLock(datadir string, lock cluster.Lock) error {
}

//nolint:gosec // File needs to be read-only for everybody
err = os.WriteFile(path.Join(datadir, "cluster-lock.json"), b, 0o444) // Read-only
err = os.WriteFile(path.Join(datadir, clusterLockFile), b, 0o444) // Read-only
if err != nil {
return errors.Wrap(err, "write lock")
}
Expand All @@ -179,12 +179,12 @@ func checkClearDataDir(dataDir string) error {
}

disallowedEntities := map[string]struct{}{
"validator_keys": {},
"cluster-lock.json": {},
validatorKeysSubDir: {},
clusterLockFile: {},
}

necessaryEntities := map[string]bool{
"charon-enr-private-key": false,
enrPrivateKeyFile: false,
}

for _, entity := range dirContent {
Expand Down
2 changes: 1 addition & 1 deletion dkg/disk_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ func TestCheckClearDataDir(t *testing.T) {

require.NoError(t,
os.WriteFile(
filepath.Join(rootDir, dataDir, "cluster-lock.json"),
filepath.Join(rootDir, dataDir, clusterLockFile),
[]byte{1, 2, 3},
0o755,
),
Expand Down
27 changes: 11 additions & 16 deletions dkg/dkg.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,23 +114,18 @@ func Run(ctx context.Context, conf Config) (err error) {

ctx = log.WithTopic(ctx, "dkg")

{
// Setup private key locking.
lockSvc, err := privkeylock.New(p2p.KeyPath(conf.DataDir), path.Join(conf.DataDir, "cluster-lock.json"), "charon dkg")
if err != nil {
return err
}

// Start it async
go func() {
if err := lockSvc.Run(); err != nil {
log.Error(ctx, "Failed to acquire lock on private key file. Another process may be using it or file permissions may be incorrect", err)
}
}()

// Stop it on exit.
defer lockSvc.Close()
// Setup private key locking and starting async
lockSvc, err := privkeylock.New(p2p.KeyPath(conf.DataDir), path.Join(conf.DataDir, clusterLockFile), "charon dkg")
if err != nil {
return err
}
defer lockSvc.Close() // safe to defer Close here, because lockSvc.Run() will be called eventually

go func() {
if err := lockSvc.Run(); err != nil {
log.Error(ctx, "Failed to update the private key lock file, another process may be using the file or file permissions may be incorrect.", err)
}
}()

version.LogInfo(ctx, "Charon DKG starting")

Expand Down
4 changes: 2 additions & 2 deletions dkg/dkg_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -362,12 +362,12 @@ func testDKG(t *testing.T, def cluster.Definition, dir string, p2pKeys []*k1.Pri
testutil.SkipIfBindErr(t, err)
testutil.RequireNoError(t, err)

// check that the privkey lock file has been deleted in all nodes at the end of dkg
// check that the privkey lock file has NOT been deleted in all nodes at the end of dkg
for i := range len(def.Operators) {
lockPath := path.Join(dir, fmt.Sprintf("node%d", i), "charon-enr-private-key.lock")

_, openErr := os.Open(lockPath)
require.ErrorIs(t, openErr, os.ErrNotExist)
require.NoError(t, openErr)
}

if keymanager {
Expand Down
Loading