From 8474d963f73fdcf135c6f34de6edbf9ae8a1d7a3 Mon Sep 17 00:00:00 2001 From: Christophe Varoqui Date: Wed, 28 Jan 2026 10:19:26 +0100 Subject: [PATCH 1/4] Avoid "adjust last run time" in undue situations Verify the job has a require-up configuration and the actor is a failover, before we do the adjust. Also add a abort job when such a job ran on a peer between our local job timer reset and the timer fire. Reschedule on the peer last run basis. --- daemon/scheduler/main.go | 47 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/daemon/scheduler/main.go b/daemon/scheduler/main.go index 35833fbfb..cff6635c8 100644 --- a/daemon/scheduler/main.go +++ b/daemon/scheduler/main.go @@ -23,6 +23,7 @@ import ( "github.com/opensvc/om3/v3/core/resourcereqs" "github.com/opensvc/om3/v3/core/schedule" "github.com/opensvc/om3/v3/core/status" + "github.com/opensvc/om3/v3/core/topology" "github.com/opensvc/om3/v3/daemon/daemondata" "github.com/opensvc/om3/v3/daemon/daemonsubsystem" "github.com/opensvc/om3/v3/daemon/msgbus" @@ -46,6 +47,7 @@ type ( jobs Jobs enabled bool provisioned map[naming.Path]bool + failover map[naming.Path]bool schedules Schedules isCollectorJoinable bool @@ -148,6 +150,7 @@ func New(subQS pubsub.QueueSizer, opts ...funcopt.O) *T { events: make(chan any), jobs: make(Jobs), schedules: make(Schedules), + failover: make(map[naming.Path]bool), provisioned: make(map[naming.Path]bool), subQS: subQS, lastRunOnAllPeers: make(timeMap), @@ -259,6 +262,29 @@ func (t Job) Cancel() { t.cancel = nil } +func (t *T) peerInstanceLastRun(e schedule.Entry) time.Time { + if e.Path.IsZero() { + return time.Time{} + } + if !t.isFailover(e.Path) { + return time.Time{} + } + if e.Config.Require == "" { + return time.Time{} + } + if strings.Contains(e.Config.Require, "down") { + return time.Time{} + } + if strings.Contains(e.Config.Require, "warn") { + return time.Time{} + } + lastRunOnAllPeers, ok := t.lastRunOnAllPeers.Get(e.Path, e.Key) + if !ok { + return time.Time{} + } + return lastRunOnAllPeers +} + func (t *T) createJob(e schedule.Entry) { if !t.enabled { return @@ -277,9 +303,10 @@ func (t *T) createJob(e schedule.Entry) { // after daemon start: initialize the schedule's LastRunAt from LastRunFile e.LastRunAt = e.GetLastRun() } - if lastRunOnAllPeers, ok := t.lastRunOnAllPeers.Get(e.Path, e.Key); ok && e.LastRunAt.Before(lastRunOnAllPeers) { - logger.Infof("adjust schedule entry last run time: %s => %s", e.LastRunAt, lastRunOnAllPeers) - e.LastRunAt = lastRunOnAllPeers + + if tm := t.peerInstanceLastRun(e); e.LastRunAt.Before(tm) { + logger.Infof("adjust schedule entry last run time: %s => %s", e.LastRunAt, tm) + e.LastRunAt = tm } now := time.Now() // keep before GetNext call @@ -324,6 +351,11 @@ func (t *T) jobLogger(e schedule.Entry) *plog.Logger { return logger.WithPrefix(prefix) } +func (t *T) isFailover(path naming.Path) bool { + isFailover, hasFailover := t.failover[path] + return hasFailover && isFailover +} + func (t *T) isProvisioned(path naming.Path) bool { isProvisioned, hasProvisioned := t.provisioned[path] return hasProvisioned && isProvisioned @@ -356,6 +388,12 @@ func (t *T) onJobAlarm(c eventJobAlarm) { } } + if tm := t.peerInstanceLastRun(e); c.schedule.LastRunAt.Before(tm) { + logger.Infof("aborted, job ran on peer at %s", tm) + t.recreateJobFrom(e, tm) + return + } + // plan the next run before exec, so another exec can be done // even if another is running e.LastRunAt = c.schedule.LastRunAt @@ -745,6 +783,8 @@ func (t *T) onDaemonCollectorUpdated(c *msgbus.DaemonCollectorUpdated) { func (t *T) onObjectStatusDeleted(c *msgbus.ObjectStatusDeleted) { t.lastRunOnAllPeers.UnsetPath(c.Path) t.reqSatisfied.UnsetPath(c.Path) + delete(t.provisioned, c.Path) + delete(t.failover, c.Path) } func (t *T) onObjectStatusUpdated(c *msgbus.ObjectStatusUpdated) { @@ -761,6 +801,7 @@ func (t *T) onObjectStatusUpdated(c *msgbus.ObjectStatusUpdated) { delete(t.provisioned, c.Path) return } + t.failover[c.Path] = c.Value.Topology == topology.Failover isProvisioned := c.Value.Provisioned.IsOneOf(provisioned.True, provisioned.NotApplicable) wasProvisioned, ok := t.provisioned[c.Path] t.provisioned[c.Path] = isProvisioned From 7fbebfcebfbee503c5fb81ea9e4a758bae21e4aa Mon Sep 17 00:00:00 2001 From: Christophe Varoqui Date: Wed, 28 Jan 2026 10:40:53 +0100 Subject: [PATCH 2/4] Fix missing resource status log entries in status.json The status log entries were cloned before we added more entries (e.g. isProvisioned() can add entries). Clone the log last. --- core/resource/resource.go | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/core/resource/resource.go b/core/resource/resource.go index 5e82116c3..515909248 100644 --- a/core/resource/resource.go +++ b/core/resource/resource.go @@ -1226,15 +1226,14 @@ func GetStatus(ctx context.Context, r Driver) Status { // on containers it will set the initial inspect. resStatus := EvalStatus(ctx, r) return Status{ - Label: formatResourceLabel(ctx, r), - Type: r.Manifest().DriverID.String(), - Status: resStatus, - Subset: r.RSubset(), - Tags: r.TagSet(), - Log: r.StatusLog().Entries(), - IsProvisioned: getProvisionStatus(ctx, r), - Info: getStatusInfo(ctx, r), - Files: getFiles(ctx, r), + Label: formatResourceLabel(ctx, r), + Type: r.Manifest().DriverID.String(), + Status: resStatus, + Subset: r.RSubset(), + Tags: r.TagSet(), + Log: r.StatusLog().Entries(), + Info: getStatusInfo(ctx, r), + Files: getFiles(ctx, r), IsStopped: r.IsStopped(), IsMonitored: r.IsMonitored(), @@ -1243,6 +1242,9 @@ func GetStatus(ctx context.Context, r Driver) Status { IsStandby: r.IsStandby(), IsDisabled: r.IsDisabled(), IsEncap: r.IsEncap(), + + // keep last because all previous func calls can add entries + IsProvisioned: getProvisionStatus(ctx, r), } } From 8b3b65d685bc45c9b1c2b4d6406cac0c94996375 Mon Sep 17 00:00:00 2001 From: Christophe Varoqui Date: Wed, 28 Jan 2026 11:52:51 +0100 Subject: [PATCH 3/4] Fix instance status render empty when daemon is down Only sec,cfg,usr,ccfg,nscfg instance status render --- core/instance/states_render.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/instance/states_render.go b/core/instance/states_render.go index 8be158b5f..024e570a3 100644 --- a/core/instance/states_render.go +++ b/core/instance/states_render.go @@ -5,6 +5,7 @@ import ( "strings" "github.com/opensvc/om3/v3/core/colorstatus" + "github.com/opensvc/om3/v3/core/naming" "github.com/opensvc/om3/v3/core/provisioned" "github.com/opensvc/om3/v3/core/rawconfig" "github.com/opensvc/om3/v3/core/resource" @@ -46,7 +47,8 @@ func (t States) LoadTreeNodeFolded(head *tree.Node) { func (t States) LoadTreeNode(head *tree.Node) { head.AddColumn().AddText(t.Node.Name).SetColor(rawconfig.Color.Bold) head.AddColumn() - if t.Config.ActorConfig == nil { + switch t.Path.Kind { + case naming.KindSec, naming.KindCfg, naming.KindCcfg, naming.KindUsr, naming.KindNscfg: head.AddColumn() head.AddColumn().AddText(t.descString()) return From add4d8afc7b779e80b1f267dc0bd3758541f8ba7 Mon Sep 17 00:00:00 2001 From: Christophe Varoqui Date: Wed, 28 Jan 2026 14:49:25 +0100 Subject: [PATCH 4/4] Skip encap status from a resource if encapnodes is empty No need to waste compute time and status.json size for irrelevant data. --- core/object/actor_status.go | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/core/object/actor_status.go b/core/object/actor_status.go index 56e381535..1fe598d44 100644 --- a/core/object/actor_status.go +++ b/core/object/actor_status.go @@ -218,7 +218,6 @@ func (t *actor) resourceStatusEval(ctx context.Context, data *instance.Status, m var ( resourceStatus resource.Status encapInstanceStatus *instance.EncapStatus - err error ) if v, err := t.isEncapNodeMatchingResource(r); err != nil { @@ -245,20 +244,24 @@ func (t *actor) resourceStatusEval(ctx context.Context, data *instance.Status, m } // If the resource is a encap capable container, evaluate the encap instance - if encapContainer, ok := r.(resource.Encaper); ok { - if resourceStatus.Status.Is(status.Up, status.StandbyUp) { - if encapInstanceStatus, err = t.resourceStatusEvalEncap(ctx, encapContainer, false); err != nil { - log := resource.NewStatusLog(resourceStatus.Log...) - log.Error("%s", err) - resourceStatus.Log = log.Entries() - } - } else { - encapInstanceStatus = &instance.EncapStatus{ - Status: instance.Status{ - Avail: status.Down, - Overall: status.Down, - }, - Hostname: encapContainer.GetHostname(), + if encapNodes, err := t.EncapNodes(); err != nil { + return err + } else if len(encapNodes) > 0 { + if encapContainer, ok := r.(resource.Encaper); ok { + if resourceStatus.Status.Is(status.Up, status.StandbyUp) { + if encapInstanceStatus, err = t.resourceStatusEvalEncap(ctx, encapContainer, false); err != nil { + log := resource.NewStatusLog(resourceStatus.Log...) + log.Error("%s", err) + resourceStatus.Log = log.Entries() + } + } else { + encapInstanceStatus = &instance.EncapStatus{ + Status: instance.Status{ + Avail: status.Down, + Overall: status.Down, + }, + Hostname: encapContainer.GetHostname(), + } } } }