From 50ce5694c399d395907d0dc2ffc008ec92115a4b Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Thu, 8 Jan 2026 18:22:51 -0800 Subject: [PATCH 1/3] Fix aggregate whitespace handling --- yardstick-rs/src/sql/measures.rs | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/yardstick-rs/src/sql/measures.rs b/yardstick-rs/src/sql/measures.rs index 2e01a52..0bf7583 100644 --- a/yardstick-rs/src/sql/measures.rs +++ b/yardstick-rs/src/sql/measures.rs @@ -3767,10 +3767,10 @@ fn extract_dimension_columns_from_select(sql: &str) -> Vec { // Filter out AGGREGATE() calls and extract column names for item in items { - let item_upper = item.to_uppercase(); - if item_upper.contains("AGGREGATE(") { + if has_aggregate_function(&item) { continue; } + let item_upper = item.to_uppercase(); // Handle "col AS alias" - use the column name, not alias let col = if let Some(as_pos) = item_upper.find(" AS ") { item[..as_pos].trim() @@ -3807,9 +3807,28 @@ mod tests { #[test] fn test_has_aggregate_function() { assert!(has_aggregate_function("SELECT AGGREGATE(revenue) FROM foo")); + assert!(has_aggregate_function("SELECT AGGREGATE (revenue) FROM foo")); assert!(!has_aggregate_function("SELECT SUM(amount) FROM foo")); } + #[test] + fn test_extract_dimension_columns_ignores_aggregate_with_space() { + let cols = extract_dimension_columns_from_select( + "SELECT region, AGGREGATE (revenue) FROM sales_v", + ); + assert_eq!(cols, vec!["region".to_string()]); + + let cols = extract_dimension_columns_from_select( + "SELECT region, AGGREGATE (revenue) AT (ALL region) FROM sales_v", + ); + assert_eq!(cols, vec!["region".to_string()]); + + let cols = extract_dimension_columns_from_select( + "SELECT AGGREGATE (revenue) FROM sales_v", + ); + assert!(cols.is_empty()); + } + #[test] #[serial] fn test_process_create_view_basic() { From b413d38dfbfee6f05bb396cdff317430d034850b Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Thu, 8 Jan 2026 18:41:39 -0800 Subject: [PATCH 2/3] Tighten AGGREGATE token check --- yardstick-rs/src/sql/measures.rs | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/yardstick-rs/src/sql/measures.rs b/yardstick-rs/src/sql/measures.rs index 0bf7583..f40d193 100644 --- a/yardstick-rs/src/sql/measures.rs +++ b/yardstick-rs/src/sql/measures.rs @@ -206,8 +206,18 @@ pub fn has_aggregate_function(sql: &str) -> bool { let sql_upper = sql.to_uppercase(); let mut search_pos = 0; + let is_ident_char = |c: char| c.is_alphanumeric() || c == '_'; + while let Some(offset) = sql_upper[search_pos..].find("AGGREGATE") { let start = search_pos + offset; + if start > 0 { + if let Some(prev) = sql_upper[..start].chars().last() { + if is_ident_char(prev) { + search_pos = start + 1; + continue; + } + } + } let after = &sql_upper[start + "AGGREGATE".len()..]; if after.trim_start().starts_with('(') { return true; @@ -3808,6 +3818,8 @@ mod tests { fn test_has_aggregate_function() { assert!(has_aggregate_function("SELECT AGGREGATE(revenue) FROM foo")); assert!(has_aggregate_function("SELECT AGGREGATE (revenue) FROM foo")); + assert!(!has_aggregate_function("SELECT TOTAL_AGGREGATE(revenue) FROM foo")); + assert!(!has_aggregate_function("SELECT myaggregate(revenue) FROM foo")); assert!(!has_aggregate_function("SELECT SUM(amount) FROM foo")); } @@ -3829,6 +3841,17 @@ mod tests { assert!(cols.is_empty()); } + #[test] + fn test_extract_dimension_columns_keeps_non_aggregate_suffix() { + let cols = extract_dimension_columns_from_select( + "SELECT region, TOTAL_AGGREGATE(revenue) FROM sales_v", + ); + assert_eq!( + cols, + vec!["region".to_string(), "TOTAL_AGGREGATE(revenue)".to_string()] + ); + } + #[test] #[serial] fn test_process_create_view_basic() { From 69e5d7941632c1fb2e1529cc39bf867bd16e568d Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Thu, 8 Jan 2026 18:46:10 -0800 Subject: [PATCH 3/3] Handle qualified AGGREGATE --- yardstick-rs/src/sql/measures.rs | 154 ++++++++++++++++++++++++++++--- 1 file changed, 141 insertions(+), 13 deletions(-) diff --git a/yardstick-rs/src/sql/measures.rs b/yardstick-rs/src/sql/measures.rs index f40d193..9068ec1 100644 --- a/yardstick-rs/src/sql/measures.rs +++ b/yardstick-rs/src/sql/measures.rs @@ -203,26 +203,135 @@ pub fn has_as_measure(sql: &str) -> bool { /// Check if SQL contains AGGREGATE( function pub fn has_aggregate_function(sql: &str) -> bool { - let sql_upper = sql.to_uppercase(); - let mut search_pos = 0; + let chars: Vec = sql.chars().collect(); + let len = chars.len(); + let mut i = 0; + let is_ident_start = |c: char| c.is_alphabetic() || c == '_'; let is_ident_char = |c: char| c.is_alphanumeric() || c == '_'; - while let Some(offset) = sql_upper[search_pos..].find("AGGREGATE") { - let start = search_pos + offset; - if start > 0 { - if let Some(prev) = sql_upper[..start].chars().last() { - if is_ident_char(prev) { - search_pos = start + 1; - continue; + let skip_whitespace = |mut idx: usize| -> usize { + while idx < len && chars[idx].is_whitespace() { + idx += 1; + } + idx + }; + + let parse_identifier = |start: usize| -> (String, usize) { + let mut idx = start + 1; + while idx < len && is_ident_char(chars[idx]) { + idx += 1; + } + let token: String = chars[start..idx].iter().collect(); + (token, idx) + }; + + let parse_quoted_identifier = |start: usize| -> (String, usize) { + let mut token = String::new(); + let mut idx = start; + while idx < len { + match chars[idx] { + '"' => { + if idx + 1 < len && chars[idx + 1] == '"' { + token.push('"'); + idx += 2; + } else { + idx += 1; + break; + } + } + c => { + token.push(c); + idx += 1; } } } - let after = &sql_upper[start + "AGGREGATE".len()..]; - if after.trim_start().starts_with('(') { - return true; + (token, idx) + }; + + let parse_qualified_chain = |first: String, mut idx: usize| -> (String, usize) { + let mut last = first; + loop { + idx = skip_whitespace(idx); + if idx >= len || chars[idx] != '.' { + break; + } + idx += 1; + idx = skip_whitespace(idx); + if idx >= len { + break; + } + if chars[idx] == '"' { + let (token, next) = parse_quoted_identifier(idx + 1); + last = token; + idx = next; + } else if is_ident_start(chars[idx]) { + let (token, next) = parse_identifier(idx); + last = token; + idx = next; + } else { + break; + } + } + (last, idx) + }; + + let is_aggregate_token = |token: &str| token.eq_ignore_ascii_case("AGGREGATE"); + + while i < len { + match chars[i] { + '\'' => { + i += 1; + while i < len { + if chars[i] == '\'' { + if i + 1 < len && chars[i + 1] == '\'' { + i += 2; + continue; + } + i += 1; + break; + } + i += 1; + } + } + '-' if i + 1 < len && chars[i + 1] == '-' => { + i += 2; + while i < len && chars[i] != '\n' { + i += 1; + } + } + '/' if i + 1 < len && chars[i + 1] == '*' => { + i += 2; + while i + 1 < len { + if chars[i] == '*' && chars[i + 1] == '/' { + i += 2; + break; + } + i += 1; + } + } + '"' => { + let (token, next) = parse_quoted_identifier(i + 1); + let (last, after_chain) = parse_qualified_chain(token, next); + let after_ws = skip_whitespace(after_chain); + if after_ws < len && chars[after_ws] == '(' && is_aggregate_token(&last) { + return true; + } + i = after_chain; + } + c if is_ident_start(c) => { + let (token, next) = parse_identifier(i); + let (last, after_chain) = parse_qualified_chain(token, next); + let after_ws = skip_whitespace(after_chain); + if after_ws < len && chars[after_ws] == '(' && is_aggregate_token(&last) { + return true; + } + i = after_chain; + } + _ => { + i += 1; + } } - search_pos = start + 1; } false @@ -3818,7 +3927,13 @@ mod tests { fn test_has_aggregate_function() { assert!(has_aggregate_function("SELECT AGGREGATE(revenue) FROM foo")); assert!(has_aggregate_function("SELECT AGGREGATE (revenue) FROM foo")); + assert!(has_aggregate_function("SELECT \"AGGREGATE\"(revenue) FROM foo")); + assert!(has_aggregate_function("SELECT schema.AGGREGATE(revenue) FROM foo")); + assert!(has_aggregate_function( + "SELECT \"schema\".\"AGGREGATE\" (revenue) FROM foo" + )); assert!(!has_aggregate_function("SELECT TOTAL_AGGREGATE(revenue) FROM foo")); + assert!(!has_aggregate_function("SELECT \"TOTAL_AGGREGATE\"(revenue) FROM foo")); assert!(!has_aggregate_function("SELECT myaggregate(revenue) FROM foo")); assert!(!has_aggregate_function("SELECT SUM(amount) FROM foo")); } @@ -3852,6 +3967,19 @@ mod tests { ); } + #[test] + fn test_extract_dimension_columns_ignores_quoted_and_qualified_aggregate() { + let cols = extract_dimension_columns_from_select( + "SELECT region, \"AGGREGATE\"(revenue) FROM sales_v", + ); + assert_eq!(cols, vec!["region".to_string()]); + + let cols = extract_dimension_columns_from_select( + "SELECT region, schema.AGGREGATE(revenue) FROM sales_v", + ); + assert_eq!(cols, vec!["region".to_string()]); + } + #[test] #[serial] fn test_process_create_view_basic() {