diff --git a/lading_observer/src/linux/cgroup/v2/cpu.rs b/lading_observer/src/linux/cgroup/v2/cpu.rs index fb0e98115..f3ded61bf 100644 --- a/lading_observer/src/linux/cgroup/v2/cpu.rs +++ b/lading_observer/src/linux/cgroup/v2/cpu.rs @@ -23,21 +23,17 @@ struct Stats { last_instant: Instant, } -#[derive(Debug)] +#[derive(Debug, Default)] pub(crate) struct Sampler { - prev: Stats, + /// Previous stats for delta calculation. None on first poll. + prev: Option, } impl Sampler { + /// Create a new CPU Sampler + #[must_use] pub(crate) fn new() -> Self { - Self { - prev: Stats { - usage_usec: 0, - user_usec: 0, - system_usec: 0, - last_instant: Instant::now(), - }, - } + Self { prev: None } } // Read cgroup CPU data and calculate a percentage of usage. @@ -80,17 +76,29 @@ impl Sampler { } let now = Instant::now(); - let delta_time = now.duration_since(self.prev.last_instant).as_micros(); - let delta_usage = usage_usec.saturating_sub(self.prev.usage_usec); - let delta_user = user_usec.saturating_sub(self.prev.user_usec); - let delta_system = system_usec.saturating_sub(self.prev.system_usec); - - // Update previous stats and if there's a time delta calculate the CPU - // usage. - self.prev.usage_usec = usage_usec; - self.prev.user_usec = user_usec; - self.prev.system_usec = system_usec; - self.prev.last_instant = now; + let current = Stats { + usage_usec, + user_usec, + system_usec, + last_instant: now, + }; + + // On first poll, just record baseline stats without emitting metrics. + // This avoids a spike where delta = (cumulative_since_container_start - 0). + let Some(ref prev) = self.prev else { + self.prev = Some(current); + return Ok(()); + }; + + let delta_time = now.duration_since(prev.last_instant).as_micros(); + let delta_usage = usage_usec.saturating_sub(prev.usage_usec); + let delta_user = user_usec.saturating_sub(prev.user_usec); + let delta_system = system_usec.saturating_sub(prev.system_usec); + + // Update previous stats for next poll + self.prev = Some(current); + + // Emit metrics if there's a time delta if delta_time > 0 { let delta_time = delta_time as f64; diff --git a/lading_observer/src/linux/procfs/stat.rs b/lading_observer/src/linux/procfs/stat.rs index f06913c5a..75ba9f4a7 100644 --- a/lading_observer/src/linux/procfs/stat.rs +++ b/lading_observer/src/linux/procfs/stat.rs @@ -38,14 +38,15 @@ struct CpuUtilization { #[derive(Debug)] pub(crate) struct Sampler { ticks_per_second: f64, - prev: Stats, + /// Previous stats for delta calculation. None on first poll. + prev: Option, } impl Sampler { pub(crate) fn new() -> Self { Self { ticks_per_second: unsafe { nix::libc::sysconf(nix::libc::_SC_CLK_TCK) } as f64, - prev: Stats::default(), + prev: None, } } @@ -78,16 +79,20 @@ impl Sampler { let (cur_pid, utime_ticks, stime_ticks) = parse(&stat_contents)?; assert!(cur_pid == pid); - // Get or initialize the previous stats. Note that the first time this is - // initialized we intentionally set last_instance to now to avoid scheduling - // shenanigans. let cur_stats = Stats { user_ticks: utime_ticks, system_ticks: stime_ticks, uptime_ticks: (uptime_secs * self.ticks_per_second).round() as u64, }; - if let Some(util) = compute_cpu_usage(self.prev, cur_stats, allowed_cores) { + // On first poll, just record baseline stats without emitting metrics. + // This avoids a spike where delta = (cumulative_since_process_start - 0). + let Some(ref prev) = self.prev else { + self.prev = Some(cur_stats); + return Ok(()); + }; + + if let Some(util) = compute_cpu_usage(*prev, cur_stats, allowed_cores) { // NOTE these metric names are paired with names in cgroup/v2/cpu.rs and // must remain consistent. If you change these, change those. gauge!("stat.total_cpu_percentage", labels).set(util.total_cpu_percentage); @@ -103,7 +108,7 @@ impl Sampler { gauge!("stat.cpu_limit_millicores", labels).set(limit_millicores); } - self.prev = cur_stats; + self.prev = Some(cur_stats); Ok(()) }