From 565b715d8ff5d35dd7d004cdfc5b3d29427c5d6a Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Thu, 8 Jan 2026 18:21:15 -0800 Subject: [PATCH 1/3] Fix _outer alias insertion --- test/sql/measures.test | 11 +++++++ yardstick-rs/src/sql/measures.rs | 50 +++++++++++++++++++++++++++++--- 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/test/sql/measures.test b/test/sql/measures.test index 8e503cb..ba77c33 100644 --- a/test/sql/measures.test +++ b/test/sql/measures.test @@ -48,6 +48,17 @@ FROM sales_v; 2023 EU 225.0 2023 US 225.0 +# Lowercase from with line break +query IIR rowsort +SEMANTIC SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total +from + sales_v; +---- +2022 EU 150.0 +2022 US 150.0 +2023 EU 225.0 +2023 US 225.0 + # ============================================================================= # Test: AT modifiers without SEMANTIC prefix # ============================================================================= diff --git a/yardstick-rs/src/sql/measures.rs b/yardstick-rs/src/sql/measures.rs index 2e01a52..32fc104 100644 --- a/yardstick-rs/src/sql/measures.rs +++ b/yardstick-rs/src/sql/measures.rs @@ -676,6 +676,10 @@ fn is_boundary_char(ch: Option) -> bool { ch.map_or(true, |c| !c.is_alphanumeric() && c != '_') } +fn is_table_ident_char(ch: char) -> bool { + ch.is_alphanumeric() || ch == '_' || ch == '.' +} + fn find_top_level_keyword(sql: &str, keyword: &str, start: usize) -> Option { let upper = sql.to_uppercase(); let upper_bytes = upper.as_bytes(); @@ -834,6 +838,42 @@ fn find_top_level_keyword(sql: &str, keyword: &str, start: usize) -> Option Option { + let from_pos = find_top_level_keyword(sql, "FROM", 0)?; + let bytes = sql.as_bytes(); + let mut idx = from_pos + 4; + while idx < bytes.len() && bytes[idx].is_ascii_whitespace() { + idx += 1; + } + if idx >= bytes.len() { + return None; + } + + let table_start = idx; + while idx < bytes.len() && is_table_ident_char(bytes[idx] as char) { + idx += 1; + } + if table_start == idx { + return None; + } + + let table_token = &sql[table_start..idx]; + let table_simple = table_token + .split('.') + .next_back() + .unwrap_or(table_token); + if !table_simple.eq_ignore_ascii_case(table_name) { + return None; + } + + let mut updated = String::with_capacity(sql.len() + alias.len() + 1); + updated.push_str(&sql[..idx]); + updated.push(' '); + updated.push_str(alias); + updated.push_str(&sql[idx..]); + Some(updated) +} + fn find_first_top_level_keyword(sql: &str, start: usize, keywords: &[&str]) -> Option { keywords .iter() @@ -3395,10 +3435,12 @@ pub fn expand_aggregate_with_at(sql: &str) -> AggregateExpandResult { Some(pt.effective_name.clone()) } else { // No alias on primary table, add _outer - let from_pattern = format!("FROM {}", pt.name); - let from_replacement = format!("FROM {} _outer", pt.name); - result_sql = result_sql.replace(&from_pattern, &from_replacement); - Some("_outer".to_string()) + if let Some(updated_sql) = insert_table_alias(&result_sql, &pt.name, "_outer") { + result_sql = updated_sql; + Some("_outer".to_string()) + } else { + None + } } } else { None From 53a270476a5c3a52fd3e7544be10ec01ffd7de1b Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Thu, 8 Jan 2026 18:42:42 -0800 Subject: [PATCH 2/3] Skip comments in keyword scan --- test/sql/measures.test | 22 ++++++++++++++++++++++ yardstick-rs/src/sql/measures.rs | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/test/sql/measures.test b/test/sql/measures.test index ba77c33..623f8cb 100644 --- a/test/sql/measures.test +++ b/test/sql/measures.test @@ -59,6 +59,28 @@ from 2023 EU 225.0 2023 US 225.0 +# FROM inside a line comment should be ignored +query IIR rowsort +SEMANTIC SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total +-- from sales_v +FROM sales_v; +---- +2022 EU 150.0 +2022 US 150.0 +2023 EU 225.0 +2023 US 225.0 + +# FROM inside a block comment should be ignored +query IIR rowsort +SEMANTIC SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total +/* from sales_v */ +FROM sales_v; +---- +2022 EU 150.0 +2022 US 150.0 +2023 EU 225.0 +2023 US 225.0 + # ============================================================================= # Test: AT modifiers without SEMANTIC prefix # ============================================================================= diff --git a/yardstick-rs/src/sql/measures.rs b/yardstick-rs/src/sql/measures.rs index 32fc104..2f75bae 100644 --- a/yardstick-rs/src/sql/measures.rs +++ b/yardstick-rs/src/sql/measures.rs @@ -697,11 +697,31 @@ fn find_top_level_keyword(sql: &str, keyword: &str, start: usize) -> Option Option { in_single = true; From 1ca6cd53f9d5eac93fb8c5704711ccde8cf9b53b Mon Sep 17 00:00:00 2001 From: Nico Ritschel Date: Thu, 8 Jan 2026 19:21:46 -0800 Subject: [PATCH 3/3] Ignore comments when locating FROM --- yardstick-rs/src/sql/measures.rs | 112 ++++++++++++++++++++----------- 1 file changed, 72 insertions(+), 40 deletions(-) diff --git a/yardstick-rs/src/sql/measures.rs b/yardstick-rs/src/sql/measures.rs index 2f75bae..8915b9f 100644 --- a/yardstick-rs/src/sql/measures.rs +++ b/yardstick-rs/src/sql/measures.rs @@ -583,58 +583,90 @@ pub fn extract_table_name_from_sql(sql: &str) -> Option { /// Extract table name and optional alias from SQL FROM clause /// Returns (table_name, Option) pub fn extract_table_and_alias_from_sql(sql: &str) -> Option<(String, Option)> { - // Normalize whitespace to handle newlines/tabs in SQL - let normalized: String = sql - .chars() - .map(|c| if c.is_whitespace() { ' ' } else { c }) - .collect(); - let normalized_upper = normalized.to_uppercase(); - let from_pos = normalized_upper.find(" FROM ")?; - let after_from = &normalized[from_pos..]; + fn skip_ws_and_comments(sql: &str, mut idx: usize) -> usize { + let bytes = sql.as_bytes(); + while idx < bytes.len() { + let c = bytes[idx] as char; + if c.is_whitespace() { + idx += 1; + continue; + } + if c == '-' && idx + 1 < bytes.len() && bytes[idx + 1] as char == '-' { + idx += 2; + while idx < bytes.len() { + let ch = bytes[idx] as char; + idx += 1; + if ch == '\n' || ch == '\r' { + break; + } + } + continue; + } + if c == '/' && idx + 1 < bytes.len() && bytes[idx + 1] as char == '*' { + idx += 2; + while idx + 1 < bytes.len() { + let ch = bytes[idx] as char; + if ch == '*' && bytes[idx + 1] as char == '/' { + idx += 2; + break; + } + idx += 1; + } + continue; + } + break; + } + idx + } - // Parse: FROM table_name [AS] [alias] - let (rest, _) = multispace1::<_, nom::error::Error<&str>>(after_from).ok()?; - let (rest, _) = tag_no_case::<_, _, nom::error::Error<&str>>("FROM")(rest).ok()?; - let (rest, _) = multispace1::<_, nom::error::Error<&str>>(rest).ok()?; - let (rest, table) = identifier(rest).ok()?; + let from_pos = find_top_level_keyword(sql, "FROM", 0)?; + let mut idx = from_pos + 4; + idx = skip_ws_and_comments(sql, idx); + if idx >= sql.len() { + return None; + } - // Check for alias (optional AS keyword followed by identifier) - let rest_trimmed = rest.trim_start(); + let table_start = idx; + while idx < sql.len() && is_table_ident_char(sql.as_bytes()[idx] as char) { + idx += 1; + } + if table_start == idx { + return None; + } + let table = sql[table_start..idx].to_string(); - // Check for end of FROM clause (WHERE, GROUP, ORDER, etc.) - let rest_upper = rest_trimmed.to_uppercase(); - if rest_trimmed.is_empty() - || rest_upper.starts_with("WHERE") - || rest_upper.starts_with("GROUP") - || rest_upper.starts_with("ORDER") - || rest_upper.starts_with("LIMIT") - || rest_upper.starts_with("HAVING") - || rest_upper.starts_with("JOIN") - || rest_trimmed.starts_with(';') - { - return Some((table.to_string(), None)); + idx = skip_ws_and_comments(sql, idx); + if idx >= sql.len() || sql.as_bytes()[idx] as char == ';' { + return Some((table, None)); } - // Try to parse optional AS keyword - let after_as = if rest_upper.starts_with("AS ") { - &rest_trimmed[3..] - } else { - rest_trimmed - }; + let rest = &sql[idx..]; + let rest_upper = rest.to_uppercase(); + let mut rest_after_as = rest; + if rest_upper.starts_with("AS") { + let after_as = &rest[2..]; + if after_as + .chars() + .next() + .map_or(false, |ch| ch.is_whitespace()) + { + let mut as_idx = idx + 2; + as_idx = skip_ws_and_comments(sql, as_idx); + rest_after_as = &sql[as_idx..]; + } + } - // Parse the alias identifier - if let Ok((_, alias)) = identifier(after_as.trim_start()) { - // Make sure alias isn't a keyword + if let Ok((_, alias)) = identifier(rest_after_as.trim_start()) { let alias_upper = alias.to_uppercase(); if matches!( alias_upper.as_str(), - "WHERE" | "GROUP" | "ORDER" | "LIMIT" | "HAVING" | "JOIN" + "FROM" | "WHERE" | "GROUP" | "ORDER" | "LIMIT" | "HAVING" | "JOIN" ) { - return Some((table.to_string(), None)); + return Some((table, None)); } - Some((table.to_string(), Some(alias.to_string()))) + Some((table, Some(alias.to_string()))) } else { - Some((table.to_string(), None)) + Some((table, None)) } }