Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 29 additions & 21 deletions lading_observer/src/linux/cgroup/v2/cpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,21 +23,17 @@ struct Stats {
last_instant: Instant,
}

#[derive(Debug)]
#[derive(Debug, Default)]
pub(crate) struct Sampler {
prev: Stats,
/// Previous stats for delta calculation. None on first poll.
prev: Option<Stats>,
}

impl Sampler {
/// Create a new CPU Sampler
#[must_use]
pub(crate) fn new() -> Self {
Self {
prev: Stats {
usage_usec: 0,
user_usec: 0,
system_usec: 0,
last_instant: Instant::now(),
},
}
Self { prev: None }
}

// Read cgroup CPU data and calculate a percentage of usage.
Expand Down Expand Up @@ -80,17 +76,29 @@ impl Sampler {
}

let now = Instant::now();
let delta_time = now.duration_since(self.prev.last_instant).as_micros();
let delta_usage = usage_usec.saturating_sub(self.prev.usage_usec);
let delta_user = user_usec.saturating_sub(self.prev.user_usec);
let delta_system = system_usec.saturating_sub(self.prev.system_usec);

// Update previous stats and if there's a time delta calculate the CPU
// usage.
self.prev.usage_usec = usage_usec;
self.prev.user_usec = user_usec;
self.prev.system_usec = system_usec;
self.prev.last_instant = now;
let current = Stats {
usage_usec,
user_usec,
system_usec,
last_instant: now,
};

// On first poll, just record baseline stats without emitting metrics.
// This avoids a spike where delta = (cumulative_since_container_start - 0).
let Some(ref prev) = self.prev else {
self.prev = Some(current);
return Ok(());
};

let delta_time = now.duration_since(prev.last_instant).as_micros();
let delta_usage = usage_usec.saturating_sub(prev.usage_usec);
let delta_user = user_usec.saturating_sub(prev.user_usec);
let delta_system = system_usec.saturating_sub(prev.system_usec);

// Update previous stats for next poll
self.prev = Some(current);

// Emit metrics if there's a time delta
if delta_time > 0 {
let delta_time = delta_time as f64;

Expand Down
19 changes: 12 additions & 7 deletions lading_observer/src/linux/procfs/stat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,15 @@ struct CpuUtilization {
#[derive(Debug)]
pub(crate) struct Sampler {
ticks_per_second: f64,
prev: Stats,
/// Previous stats for delta calculation. None on first poll.
prev: Option<Stats>,
}

impl Sampler {
pub(crate) fn new() -> Self {
Self {
ticks_per_second: unsafe { nix::libc::sysconf(nix::libc::_SC_CLK_TCK) } as f64,
prev: Stats::default(),
prev: None,
}
}

Expand Down Expand Up @@ -78,16 +79,20 @@ impl Sampler {
let (cur_pid, utime_ticks, stime_ticks) = parse(&stat_contents)?;
assert!(cur_pid == pid);

// Get or initialize the previous stats. Note that the first time this is
// initialized we intentionally set last_instance to now to avoid scheduling
// shenanigans.
let cur_stats = Stats {
user_ticks: utime_ticks,
system_ticks: stime_ticks,
uptime_ticks: (uptime_secs * self.ticks_per_second).round() as u64,
};

if let Some(util) = compute_cpu_usage(self.prev, cur_stats, allowed_cores) {
// On first poll, just record baseline stats without emitting metrics.
// This avoids a spike where delta = (cumulative_since_process_start - 0).
let Some(ref prev) = self.prev else {
self.prev = Some(cur_stats);
return Ok(());
};

if let Some(util) = compute_cpu_usage(*prev, cur_stats, allowed_cores) {
// NOTE these metric names are paired with names in cgroup/v2/cpu.rs and
// must remain consistent. If you change these, change those.
gauge!("stat.total_cpu_percentage", labels).set(util.total_cpu_percentage);
Expand All @@ -103,7 +108,7 @@ impl Sampler {
gauge!("stat.cpu_limit_millicores", labels).set(limit_millicores);
}

self.prev = cur_stats;
self.prev = Some(cur_stats);

Ok(())
}
Expand Down
Loading