From be0d77089cd6b3f31790d7e14097fecb1d8ee6ae Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Fri, 23 Jan 2026 18:35:55 +0000 Subject: [PATCH 1/4] refactor: fix test_timedelta_agg__timedelta_result on agg_ops.QuantileOp --- bigframes/core/compile/sqlglot/aggregations/unary_compiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py b/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py index 647e86d28a..3b85139004 100644 --- a/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py +++ b/bigframes/core/compile/sqlglot/aggregations/unary_compiler.py @@ -527,7 +527,7 @@ def _( else: result = apply_window_if_present(result, window) - if op.should_floor_result: + if op.should_floor_result or column.dtype == dtypes.TIMEDELTA_DTYPE: result = sge.Cast(this=sge.func("FLOOR", result), to="INT64") return result From 75477231180e06b9a3780407b5ab1eb161789453 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Fri, 23 Jan 2026 18:45:21 +0000 Subject: [PATCH 2/4] refactor: fix test_string_astype_json --- bigframes/core/compile/sqlglot/expressions/generic_ops.py | 2 +- .../snapshots/test_generic_ops/test_astype_json/out.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bigframes/core/compile/sqlglot/expressions/generic_ops.py b/bigframes/core/compile/sqlglot/expressions/generic_ops.py index 2f486fc9d5..003a7296fc 100644 --- a/bigframes/core/compile/sqlglot/expressions/generic_ops.py +++ b/bigframes/core/compile/sqlglot/expressions/generic_ops.py @@ -252,7 +252,7 @@ def _cast_to_json(expr: TypedExpr, op: ops.AsTypeOp) -> sge.Expression: sg_expr = expr.expr if from_type == dtypes.STRING_DTYPE: - func_name = "PARSE_JSON_IN_SAFE" if op.safe else "PARSE_JSON" + func_name = "SAFE.PARSE_JSON" if op.safe else "PARSE_JSON" return sge.func(func_name, sg_expr) if from_type in (dtypes.INT_DTYPE, dtypes.BOOL_DTYPE, dtypes.FLOAT_DTYPE): sg_expr = sge.Cast(this=sg_expr, to="STRING") diff --git a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_astype_json/out.sql b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_astype_json/out.sql index 2defc2e72b..4ffaf7256a 100644 --- a/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_astype_json/out.sql +++ b/tests/unit/core/compile/sqlglot/expressions/snapshots/test_generic_ops/test_astype_json/out.sql @@ -13,7 +13,7 @@ WITH `bfcte_0` AS ( PARSE_JSON(CAST(`bool_col` AS STRING)) AS `bfcol_6`, PARSE_JSON(`string_col`) AS `bfcol_7`, PARSE_JSON(CAST(`bool_col` AS STRING)) AS `bfcol_8`, - PARSE_JSON_IN_SAFE(`string_col`) AS `bfcol_9` + SAFE.PARSE_JSON(`string_col`) AS `bfcol_9` FROM `bfcte_0` ) SELECT From 9caa05d23f2cf2e3519b4c4c8f28a73da6307c39 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Fri, 23 Jan 2026 18:47:09 +0000 Subject: [PATCH 3/4] refactor: fix test_read_gbq_w_primary_keys_table --- tests/system/small/test_session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py index 698f531d57..0501df3f8c 100644 --- a/tests/system/small/test_session.py +++ b/tests/system/small/test_session.py @@ -352,7 +352,7 @@ def test_read_gbq_w_primary_keys_table( pd.testing.assert_frame_equal(result, sorted_result) # Verify that we're working from a snapshot rather than a copy of the table. - assert "FOR SYSTEM_TIME AS OF TIMESTAMP" in df.sql + assert "FOR SYSTEM_TIME AS OF" in df.sql def test_read_gbq_w_primary_keys_table_and_filters( From 190e6b365ac72d63971f5fb757aca2cb60eb1a68 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 27 Jan 2026 19:24:17 +0000 Subject: [PATCH 4/4] refactor: fix test_precision_score_binary on from_union compiler --- bigframes/core/compile/sqlglot/compiler.py | 10 +- bigframes/core/compile/sqlglot/sqlglot_ir.py | 52 +++++------ .../test_compile_concat/out.sql | 65 ++++++------- .../test_compile_concat_filter_sorted/out.sql | 93 ++++++++----------- .../compile/sqlglot/test_compile_readlocal.py | 5 - 5 files changed, 96 insertions(+), 129 deletions(-) diff --git a/bigframes/core/compile/sqlglot/compiler.py b/bigframes/core/compile/sqlglot/compiler.py index e77370892c..3b64596d8f 100644 --- a/bigframes/core/compile/sqlglot/compiler.py +++ b/bigframes/core/compile/sqlglot/compiler.py @@ -266,10 +266,16 @@ def compile_concat(node: nodes.ConcatNode, *children: ir.SQLGlotIR) -> ir.SQLGlo assert len(children) >= 1 uid_gen = children[0].uid_gen - output_ids = [id.sql for id in node.output_ids] + # BigQuery `UNION` query takes the column names from the first `SELECT` clause. + default_output_ids = [field.id.sql for field in node.child_nodes[0].fields] + output_aliases = [ + (default_output_id, output_id.sql) + for default_output_id, output_id in zip(default_output_ids, node.output_ids) + ] + return ir.SQLGlotIR.from_union( [child.expr for child in children], - output_ids=output_ids, + output_aliases=output_aliases, uid_gen=uid_gen, ) diff --git a/bigframes/core/compile/sqlglot/sqlglot_ir.py b/bigframes/core/compile/sqlglot/sqlglot_ir.py index d4dc4ecc06..544d46b832 100644 --- a/bigframes/core/compile/sqlglot/sqlglot_ir.py +++ b/bigframes/core/compile/sqlglot/sqlglot_ir.py @@ -170,7 +170,9 @@ def from_query_string( cls, query_string: str, ) -> SQLGlotIR: - """Builds a SQLGlot expression from a query string""" + """Builds a SQLGlot expression from a query string. Wrapping the query + in a CTE can avoid the query parsing issue for unsupported syntax in + SQLGlot.""" uid_gen: guid.SequentialUIDGenerator = guid.SequentialUIDGenerator() cte_name = sge.to_identifier( next(uid_gen.get_uid_stream("bfcte_")), quoted=cls.quoted @@ -187,7 +189,7 @@ def from_query_string( def from_union( cls, selects: typing.Sequence[sge.Select], - output_ids: typing.Sequence[str], + output_aliases: typing.Sequence[typing.Tuple[str, str]], uid_gen: guid.SequentialUIDGenerator, ) -> SQLGlotIR: """Builds a SQLGlot expression by unioning of multiple select expressions.""" @@ -196,7 +198,7 @@ def from_union( ), f"At least two select expressions must be provided, but got {selects}." existing_ctes: list[sge.CTE] = [] - union_selects: list[sge.Expression] = [] + union_selects: list[sge.Select] = [] for select in selects: assert isinstance( select, sge.Select @@ -205,37 +207,27 @@ def from_union( select_expr = select.copy() select_expr, select_ctes = _pop_query_ctes(select_expr) existing_ctes = [*existing_ctes, *select_ctes] - - new_cte_name = sge.to_identifier( - next(uid_gen.get_uid_stream("bfcte_")), quoted=cls.quoted - ) - new_cte = sge.CTE( - this=select_expr, - alias=new_cte_name, + union_selects.append(select_expr) + + union_expr: sge.Query = union_selects[0].subquery() + for select in union_selects[1:]: + union_expr = sge.Union( + this=union_expr, + expression=select.subquery(), + distinct=False, + copy=False, ) - existing_ctes = [*existing_ctes, new_cte] - selections = [ - sge.Alias( - this=sge.to_identifier(expr.alias_or_name, quoted=cls.quoted), - alias=sge.to_identifier(output_id, quoted=cls.quoted), - ) - for expr, output_id in zip(select_expr.expressions, output_ids) - ] - union_selects.append( - sge.Select().select(*selections).from_(sge.Table(this=new_cte_name)) + selections = [ + sge.Alias( + this=sge.to_identifier(old_name, quoted=cls.quoted), + alias=sge.to_identifier(new_name, quoted=cls.quoted), ) - - union_expr = typing.cast( - sge.Select, - functools.reduce( - lambda x, y: sge.Union( - this=x, expression=y, distinct=False, copy=False - ), - union_selects, - ), + for old_name, new_name in output_aliases + ] + final_select_expr = ( + sge.Select().select(*selections).from_(union_expr.subquery()) ) - final_select_expr = sge.Select().select(sge.Star()).from_(union_expr.subquery()) final_select_expr = _set_query_ctes(final_select_expr, existing_ctes) return cls(expr=final_select_expr, uid_gen=uid_gen) diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat/out.sql index a0d7db2b1a..db1da10086 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat/out.sql @@ -14,15 +14,6 @@ WITH `bfcte_1` AS ( *, 0 AS `bfcol_8` FROM `bfcte_3` -), `bfcte_6` AS ( - SELECT - `rowindex` AS `bfcol_9`, - `rowindex` AS `bfcol_10`, - `int64_col` AS `bfcol_11`, - `string_col` AS `bfcol_12`, - `bfcol_8` AS `bfcol_13`, - `bfcol_7` AS `bfcol_14` - FROM `bfcte_5` ), `bfcte_0` AS ( SELECT `int64_col`, @@ -39,36 +30,36 @@ WITH `bfcte_1` AS ( *, 1 AS `bfcol_23` FROM `bfcte_2` -), `bfcte_7` AS ( - SELECT - `rowindex` AS `bfcol_24`, - `rowindex` AS `bfcol_25`, - `int64_col` AS `bfcol_26`, - `string_col` AS `bfcol_27`, - `bfcol_23` AS `bfcol_28`, - `bfcol_22` AS `bfcol_29` - FROM `bfcte_4` -), `bfcte_8` AS ( +), `bfcte_6` AS ( SELECT - * + `bfcol_9` AS `bfcol_30`, + `bfcol_10` AS `bfcol_31`, + `bfcol_11` AS `bfcol_32`, + `bfcol_12` AS `bfcol_33`, + `bfcol_13` AS `bfcol_34`, + `bfcol_14` AS `bfcol_35` FROM ( - SELECT - `bfcol_9` AS `bfcol_30`, - `bfcol_10` AS `bfcol_31`, - `bfcol_11` AS `bfcol_32`, - `bfcol_12` AS `bfcol_33`, - `bfcol_13` AS `bfcol_34`, - `bfcol_14` AS `bfcol_35` - FROM `bfcte_6` + ( + SELECT + `rowindex` AS `bfcol_9`, + `rowindex` AS `bfcol_10`, + `int64_col` AS `bfcol_11`, + `string_col` AS `bfcol_12`, + `bfcol_8` AS `bfcol_13`, + `bfcol_7` AS `bfcol_14` + FROM `bfcte_5` + ) UNION ALL - SELECT - `bfcol_24` AS `bfcol_30`, - `bfcol_25` AS `bfcol_31`, - `bfcol_26` AS `bfcol_32`, - `bfcol_27` AS `bfcol_33`, - `bfcol_28` AS `bfcol_34`, - `bfcol_29` AS `bfcol_35` - FROM `bfcte_7` + ( + SELECT + `rowindex` AS `bfcol_24`, + `rowindex` AS `bfcol_25`, + `int64_col` AS `bfcol_26`, + `string_col` AS `bfcol_27`, + `bfcol_23` AS `bfcol_28`, + `bfcol_22` AS `bfcol_29` + FROM `bfcte_4` + ) ) ) SELECT @@ -76,7 +67,7 @@ SELECT `bfcol_31` AS `rowindex_1`, `bfcol_32` AS `int64_col`, `bfcol_33` AS `string_col` -FROM `bfcte_8` +FROM `bfcte_6` ORDER BY `bfcol_34` ASC NULLS LAST, `bfcol_35` ASC NULLS LAST \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat_filter_sorted/out.sql b/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat_filter_sorted/out.sql index 8e65381fef..d4b70d276c 100644 --- a/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat_filter_sorted/out.sql +++ b/tests/unit/core/compile/sqlglot/snapshots/test_compile_concat/test_compile_concat_filter_sorted/out.sql @@ -13,13 +13,6 @@ WITH `bfcte_2` AS ( *, 0 AS `bfcol_5` FROM `bfcte_6` -), `bfcte_13` AS ( - SELECT - `float64_col` AS `bfcol_6`, - `int64_col` AS `bfcol_7`, - `bfcol_5` AS `bfcol_8`, - `bfcol_4` AS `bfcol_9` - FROM `bfcte_10` ), `bfcte_0` AS ( SELECT `bool_col`, @@ -42,13 +35,6 @@ WITH `bfcte_2` AS ( *, 1 AS `bfcol_16` FROM `bfcte_8` -), `bfcte_14` AS ( - SELECT - `float64_col` AS `bfcol_17`, - `int64_too` AS `bfcol_18`, - `bfcol_16` AS `bfcol_19`, - `bfcol_15` AS `bfcol_20` - FROM `bfcte_12` ), `bfcte_1` AS ( SELECT `float64_col`, @@ -64,13 +50,6 @@ WITH `bfcte_2` AS ( *, 2 AS `bfcol_26` FROM `bfcte_5` -), `bfcte_15` AS ( - SELECT - `float64_col` AS `bfcol_27`, - `int64_col` AS `bfcol_28`, - `bfcol_26` AS `bfcol_29`, - `bfcol_25` AS `bfcol_30` - FROM `bfcte_9` ), `bfcte_0` AS ( SELECT `bool_col`, @@ -93,50 +72,54 @@ WITH `bfcte_2` AS ( *, 3 AS `bfcol_37` FROM `bfcte_7` -), `bfcte_16` AS ( - SELECT - `float64_col` AS `bfcol_38`, - `int64_too` AS `bfcol_39`, - `bfcol_37` AS `bfcol_40`, - `bfcol_36` AS `bfcol_41` - FROM `bfcte_11` -), `bfcte_17` AS ( +), `bfcte_13` AS ( SELECT - * + `bfcol_6` AS `bfcol_42`, + `bfcol_7` AS `bfcol_43`, + `bfcol_8` AS `bfcol_44`, + `bfcol_9` AS `bfcol_45` FROM ( - SELECT - `bfcol_6` AS `bfcol_42`, - `bfcol_7` AS `bfcol_43`, - `bfcol_8` AS `bfcol_44`, - `bfcol_9` AS `bfcol_45` - FROM `bfcte_13` + ( + SELECT + `float64_col` AS `bfcol_6`, + `int64_col` AS `bfcol_7`, + `bfcol_5` AS `bfcol_8`, + `bfcol_4` AS `bfcol_9` + FROM `bfcte_10` + ) UNION ALL - SELECT - `bfcol_17` AS `bfcol_42`, - `bfcol_18` AS `bfcol_43`, - `bfcol_19` AS `bfcol_44`, - `bfcol_20` AS `bfcol_45` - FROM `bfcte_14` + ( + SELECT + `float64_col` AS `bfcol_17`, + `int64_too` AS `bfcol_18`, + `bfcol_16` AS `bfcol_19`, + `bfcol_15` AS `bfcol_20` + FROM `bfcte_12` + ) UNION ALL - SELECT - `bfcol_27` AS `bfcol_42`, - `bfcol_28` AS `bfcol_43`, - `bfcol_29` AS `bfcol_44`, - `bfcol_30` AS `bfcol_45` - FROM `bfcte_15` + ( + SELECT + `float64_col` AS `bfcol_27`, + `int64_col` AS `bfcol_28`, + `bfcol_26` AS `bfcol_29`, + `bfcol_25` AS `bfcol_30` + FROM `bfcte_9` + ) UNION ALL - SELECT - `bfcol_38` AS `bfcol_42`, - `bfcol_39` AS `bfcol_43`, - `bfcol_40` AS `bfcol_44`, - `bfcol_41` AS `bfcol_45` - FROM `bfcte_16` + ( + SELECT + `float64_col` AS `bfcol_38`, + `int64_too` AS `bfcol_39`, + `bfcol_37` AS `bfcol_40`, + `bfcol_36` AS `bfcol_41` + FROM `bfcte_11` + ) ) ) SELECT `bfcol_42` AS `float64_col`, `bfcol_43` AS `int64_col` -FROM `bfcte_17` +FROM `bfcte_13` ORDER BY `bfcol_44` ASC NULLS LAST, `bfcol_45` ASC NULLS LAST \ No newline at end of file diff --git a/tests/unit/core/compile/sqlglot/test_compile_readlocal.py b/tests/unit/core/compile/sqlglot/test_compile_readlocal.py index c5fabd99e6..03a8b39d9a 100644 --- a/tests/unit/core/compile/sqlglot/test_compile_readlocal.py +++ b/tests/unit/core/compile/sqlglot/test_compile_readlocal.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys - import numpy as np import pandas as pd import pytest @@ -36,7 +34,6 @@ def test_compile_readlocal_w_structs_df( compiler_session_w_nested_structs_types: bigframes.Session, snapshot, ): - # TODO(b/427306734): Check why the output is different from the expected output. bf_df = bpd.DataFrame( nested_structs_pandas_df, session=compiler_session_w_nested_structs_types ) @@ -66,8 +63,6 @@ def test_compile_readlocal_w_json_df( def test_compile_readlocal_w_special_values( compiler_session: bigframes.Session, snapshot ): - if sys.version_info < (3, 12): - pytest.skip("Skipping test due to inconsistent SQL formatting") df = pd.DataFrame( { "col_none": [None, 1, 2],