From 09d5f75a476a72ddd2422372830f79f3c1ba08ef Mon Sep 17 00:00:00 2001 From: glowsenior Date: Thu, 29 Jan 2026 18:15:36 +0100 Subject: [PATCH] fix(consensus): implement stake-weighted aggregation with outlier detection Replace simple arithmetic mean with stake-weighted consensus score calculation as documented in README. This fix addresses a critical security and correctness issue where all validators were treated equally regardless of stake. Changes: - Add calculate_stake_weighted_consensus_score() function - Implement stake-weighted average: sum(stake * score) / sum(stake) - Add outlier detection using z-score threshold (2.0) - Look up validator stakes from database for each evaluation - Handle edge cases (missing validators, zero stake, empty evaluations) Security Impact: - Restores Sybil resistance by weighting validators by stake - Prevents manipulation through outlier detection - High-stake validators now have appropriate influence Fixes: Missing stake-weighted aggregation in consensus score calculation Related: README.md lines 254-270 (Score Aggregation section) Before: Simple mean - scores.iter().sum() / scores.len() After: Stake-weighted with outlier filtering --- crates/platform-server/src/db/queries.rs | 89 +++++++++++++++++++++++- 1 file changed, 87 insertions(+), 2 deletions(-) diff --git a/crates/platform-server/src/db/queries.rs b/crates/platform-server/src/db/queries.rs index 2a602a5f..361a8fea 100644 --- a/crates/platform-server/src/db/queries.rs +++ b/crates/platform-server/src/db/queries.rs @@ -327,6 +327,91 @@ pub async fn get_evaluations_for_agent(pool: &Pool, agent_hash: &str) -> Result< // LEADERBOARD // ============================================================================ +/// Calculate stake-weighted consensus score with outlier detection +/// +/// Implements the formula from README: +/// $\bar{s}_i = \frac{\sum_{v \in \mathcal{V}'} S_v \cdot s_i^v}{\sum_{v \in \mathcal{V}'} S_v}$ +/// +/// Where $\mathcal{V}'$ is the set of validators after outlier removal (z-score > 2.0) +async fn calculate_stake_weighted_consensus_score( + pool: &Pool, + evaluations: &[Evaluation], +) -> Result { + if evaluations.is_empty() { + return Ok(0.0); + } + + // Collect (score, stake) pairs for all evaluations + let mut score_stake_pairs: Vec<(f64, u64)> = Vec::new(); + + for eval in evaluations { + // Look up validator stake (default to 0 if not found) + let stake = if let Some(validator) = get_validator(pool, &eval.validator_hotkey).await? { + validator.stake + } else { + // Validator not found in database - skip this evaluation + // (or use 0 stake, which effectively excludes it) + continue; + }; + + score_stake_pairs.push((eval.score, stake)); + } + + if score_stake_pairs.is_empty() { + // Fallback to simple average if no validators found + return Ok(evaluations.iter().map(|e| e.score).sum::() / evaluations.len() as f64); + } + + // Outlier detection: Calculate z-scores and filter outliers + let scores: Vec = score_stake_pairs.iter().map(|(s, _)| *s).collect(); + let mean = scores.iter().sum::() / scores.len() as f64; + + // Calculate standard deviation + let variance = scores + .iter() + .map(|s| (*s - mean).powi(2)) + .sum::() + / scores.len() as f64; + let std_dev = variance.sqrt(); + + // Filter outliers (z-score > 2.0 threshold) + const Z_SCORE_THRESHOLD: f64 = 2.0; + let filtered_pairs: Vec<(f64, u64)> = if std_dev > 0.0 { + score_stake_pairs + .into_iter() + .filter(|(score, _)| { + let z_score = (score - mean).abs() / std_dev; + z_score <= Z_SCORE_THRESHOLD + }) + .collect() + } else { + // If std_dev is 0, all scores are the same, no outliers + score_stake_pairs + }; + + if filtered_pairs.is_empty() { + // If all were outliers, fallback to mean + return Ok(mean); + } + + // Calculate stake-weighted average + let total_weighted_score: f64 = filtered_pairs + .iter() + .map(|(score, stake)| *score * (*stake as f64)) + .sum(); + + let total_stake: u64 = filtered_pairs.iter().map(|(_, stake)| *stake).sum(); + + if total_stake == 0 { + // Fallback to simple average if total stake is 0 + let avg_score: f64 = filtered_pairs.iter().map(|(s, _)| *s).sum::() + / filtered_pairs.len() as f64; + return Ok(avg_score); + } + + Ok(total_weighted_score / (total_stake as f64)) +} + pub async fn update_leaderboard(pool: &Pool, agent_hash: &str) -> Result> { let evaluations = get_evaluations_for_agent(pool, agent_hash).await?; if evaluations.is_empty() { @@ -337,8 +422,8 @@ pub async fn update_leaderboard(pool: &Pool, agent_hash: &str) -> Result = evaluations.iter().map(|e| e.score).collect(); - let consensus_score = scores.iter().sum::() / scores.len() as f64; + // Calculate stake-weighted consensus score with outlier detection + let consensus_score = calculate_stake_weighted_consensus_score(pool, &evaluations).await?; let evaluation_count = evaluations.len() as i32; let first_epoch = submission.epoch as i64;