Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions test/sql/measures.test
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,39 @@ FROM sales_v;
2023 EU 225.0
2023 US 225.0

# Lowercase from with line break
query IIR rowsort
SEMANTIC SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total
from
sales_v;
----
2022 EU 150.0
2022 US 150.0
2023 EU 225.0
2023 US 225.0

# FROM inside a line comment should be ignored
query IIR rowsort
SEMANTIC SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total
-- from sales_v
FROM sales_v;
----
2022 EU 150.0
2022 US 150.0
2023 EU 225.0
2023 US 225.0

# FROM inside a block comment should be ignored
query IIR rowsort
SEMANTIC SELECT year, region, AGGREGATE(revenue) AT (ALL region) AS year_total
/* from sales_v */
FROM sales_v;
----
2022 EU 150.0
2022 US 150.0
2023 EU 225.0
2023 US 225.0

# =============================================================================
# Test: AT modifiers without SEMANTIC prefix
# =============================================================================
Expand Down
192 changes: 150 additions & 42 deletions yardstick-rs/src/sql/measures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -702,56 +702,90 @@ pub fn extract_table_name_from_sql(sql: &str) -> Option<String> {
/// Extract table name and optional alias from SQL FROM clause
/// Returns (table_name, Option<alias>)
pub fn extract_table_and_alias_from_sql(sql: &str) -> Option<(String, Option<String>)> {
// Normalize whitespace to handle newlines/tabs in SQL
let normalized: String = sql
.chars()
.map(|c| if c.is_whitespace() { ' ' } else { c })
.collect();
let from_pos = find_top_level_keyword(&normalized, "FROM", 0)?;
let after_from = &normalized[from_pos..];
fn skip_ws_and_comments(sql: &str, mut idx: usize) -> usize {
let bytes = sql.as_bytes();
while idx < bytes.len() {
let c = bytes[idx] as char;
if c.is_whitespace() {
idx += 1;
continue;
}
if c == '-' && idx + 1 < bytes.len() && bytes[idx + 1] as char == '-' {
idx += 2;
while idx < bytes.len() {
let ch = bytes[idx] as char;
idx += 1;
if ch == '\n' || ch == '\r' {
break;
}
}
continue;
}
if c == '/' && idx + 1 < bytes.len() && bytes[idx + 1] as char == '*' {
idx += 2;
while idx + 1 < bytes.len() {
let ch = bytes[idx] as char;
if ch == '*' && bytes[idx + 1] as char == '/' {
idx += 2;
break;
}
idx += 1;
}
continue;
}
break;
}
idx
}

// Parse: FROM table_name [AS] [alias]
let (rest, _) = tag_no_case::<_, _, nom::error::Error<&str>>("FROM")(after_from).ok()?;
let (rest, _) = multispace1::<_, nom::error::Error<&str>>(rest).ok()?;
let (rest, table) = identifier(rest).ok()?;
let from_pos = find_top_level_keyword(sql, "FROM", 0)?;
let mut idx = from_pos + 4;
idx = skip_ws_and_comments(sql, idx);
if idx >= sql.len() {
return None;
}

// Check for alias (optional AS keyword followed by identifier)
let rest_trimmed = rest.trim_start();
let table_start = idx;
while idx < sql.len() && is_table_ident_char(sql.as_bytes()[idx] as char) {
idx += 1;
}
if table_start == idx {
return None;
}
let table = sql[table_start..idx].to_string();

// Check for end of FROM clause (WHERE, GROUP, ORDER, etc.)
let rest_upper = rest_trimmed.to_uppercase();
if rest_trimmed.is_empty()
|| rest_upper.starts_with("WHERE")
|| rest_upper.starts_with("GROUP")
|| rest_upper.starts_with("ORDER")
|| rest_upper.starts_with("LIMIT")
|| rest_upper.starts_with("HAVING")
|| rest_upper.starts_with("JOIN")
|| rest_trimmed.starts_with(';')
{
return Some((table.to_string(), None));
idx = skip_ws_and_comments(sql, idx);
if idx >= sql.len() || sql.as_bytes()[idx] as char == ';' {
return Some((table, None));
}

// Try to parse optional AS keyword
let after_as = if rest_upper.starts_with("AS ") {
&rest_trimmed[3..]
} else {
rest_trimmed
};
let rest = &sql[idx..];
let rest_upper = rest.to_uppercase();
let mut rest_after_as = rest;
if rest_upper.starts_with("AS") {
let after_as = &rest[2..];
if after_as
.chars()
.next()
.map_or(false, |ch| ch.is_whitespace())
{
let mut as_idx = idx + 2;
as_idx = skip_ws_and_comments(sql, as_idx);
rest_after_as = &sql[as_idx..];
}
}

// Parse the alias identifier
if let Ok((_, alias)) = identifier(after_as.trim_start()) {
// Make sure alias isn't a keyword
if let Ok((_, alias)) = identifier(rest_after_as.trim_start()) {
let alias_upper = alias.to_uppercase();
if matches!(
alias_upper.as_str(),
"WHERE" | "GROUP" | "ORDER" | "LIMIT" | "HAVING" | "JOIN"
"FROM" | "WHERE" | "GROUP" | "ORDER" | "LIMIT" | "HAVING" | "JOIN"
) {
return Some((table.to_string(), None));
return Some((table, None));
}
Some((table.to_string(), Some(alias.to_string())))
Some((table, Some(alias.to_string())))
} else {
Some((table.to_string(), None))
Some((table, None))
}
}

Expand Down Expand Up @@ -793,6 +827,10 @@ fn is_boundary_char(ch: Option<char>) -> bool {
ch.map_or(true, |c| !c.is_alphanumeric() && c != '_')
}

fn is_table_ident_char(ch: char) -> bool {
ch.is_alphanumeric() || ch == '_' || ch == '.'
}

fn find_top_level_keyword(sql: &str, keyword: &str, start: usize) -> Option<usize> {
let upper = sql.to_uppercase();
let upper_bytes = upper.as_bytes();
Expand All @@ -810,11 +848,31 @@ fn find_top_level_keyword(sql: &str, keyword: &str, start: usize) -> Option<usiz
let mut in_double = false;
let mut in_backtick = false;
let mut in_bracket = false;
let mut in_line_comment = false;
let mut in_block_comment = false;

let mut i = start;
while i < bytes.len() {
let c = bytes[i] as char;

if in_line_comment {
if c == '\n' || c == '\r' {
in_line_comment = false;
}
i += 1;
continue;
}

if in_block_comment {
if c == '*' && i + 1 < bytes.len() && bytes[i + 1] as char == '/' {
in_block_comment = false;
i += 2;
continue;
}
i += 1;
continue;
}

if in_single {
if c == '\'' {
if i + 1 < bytes.len() && bytes[i + 1] as char == '\'' {
Expand Down Expand Up @@ -851,6 +909,18 @@ fn find_top_level_keyword(sql: &str, keyword: &str, start: usize) -> Option<usiz
continue;
}

if c == '-' && i + 1 < bytes.len() && bytes[i + 1] as char == '-' {
in_line_comment = true;
i += 2;
continue;
}

if c == '/' && i + 1 < bytes.len() && bytes[i + 1] as char == '*' {
in_block_comment = true;
i += 2;
continue;
}

match c {
'\'' => {
in_single = true;
Expand Down Expand Up @@ -951,6 +1021,42 @@ fn find_top_level_keyword(sql: &str, keyword: &str, start: usize) -> Option<usiz
None
}

fn insert_table_alias(sql: &str, table_name: &str, alias: &str) -> Option<String> {
let from_pos = find_top_level_keyword(sql, "FROM", 0)?;
let bytes = sql.as_bytes();
Comment on lines +1024 to +1026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Skip comments when locating FROM for alias insertion

insert_table_alias relies on find_top_level_keyword to grab the first FROM at depth 0, but that helper does not skip SQL comments. If the query contains a -- ... from <table> or /* from <table> */ comment before the real FROM clause, this will insert _outer into the comment and return Some(_), so later expansion uses _outer even though the actual FROM clause was never updated. That yields correlated references like _outer.col against a query that has no _outer alias, causing SQL errors in any statement that needs outer correlation. Consider ignoring comments in find_top_level_keyword or searching for the first FROM token that is not inside a comment before inserting the alias.

Useful? React with 👍 / 👎.

let mut idx = from_pos + 4;
while idx < bytes.len() && bytes[idx].is_ascii_whitespace() {
idx += 1;
}
if idx >= bytes.len() {
return None;
}

let table_start = idx;
while idx < bytes.len() && is_table_ident_char(bytes[idx] as char) {
idx += 1;
}
if table_start == idx {
return None;
}

let table_token = &sql[table_start..idx];
let table_simple = table_token
.split('.')
.next_back()
.unwrap_or(table_token);
if !table_simple.eq_ignore_ascii_case(table_name) {
return None;
}

let mut updated = String::with_capacity(sql.len() + alias.len() + 1);
updated.push_str(&sql[..idx]);
updated.push(' ');
updated.push_str(alias);
updated.push_str(&sql[idx..]);
Some(updated)
}

fn find_first_top_level_keyword(sql: &str, start: usize, keywords: &[&str]) -> Option<usize> {
keywords
.iter()
Expand Down Expand Up @@ -3826,10 +3932,12 @@ pub fn expand_aggregate_with_at(sql: &str) -> AggregateExpandResult {
Some(pt.effective_name.clone())
} else {
// No alias on primary table, add _outer
let from_pattern = format!("FROM {}", pt.name);
let from_replacement = format!("FROM {} _outer", pt.name);
result_sql = result_sql.replace(&from_pattern, &from_replacement);
Some("_outer".to_string())
if let Some(updated_sql) = insert_table_alias(&result_sql, &pt.name, "_outer") {
result_sql = updated_sql;
Some("_outer".to_string())
} else {
None
}
}
} else {
None
Expand Down
Loading