diff --git a/snuba/admin/static/api_client.tsx b/snuba/admin/static/api_client.tsx index 4dcb29a957d..bc8ba005691 100644 --- a/snuba/admin/static/api_client.tsx +++ b/snuba/admin/static/api_client.tsx @@ -44,6 +44,20 @@ import { AllocationPolicy, StrategyData } from "SnubaAdmin/configurable_componen import { ReplayInstruction, Topic } from "SnubaAdmin/dead_letter_queue/types"; import { AutoReplacementsBypassProjectsData } from "SnubaAdmin/auto_replacements_bypass_projects/types"; import { ClickhouseNodeInfo, ClickhouseSystemSetting } from "SnubaAdmin/database_clusters/types"; +import { QuerySummary } from "SnubaAdmin/tracing/types"; + +interface ProfileEventsResponse { + profile_events_results?: { [nodeName: string]: { column_names: string[]; rows: string[] } }; + profile_events_meta?: Array; + profile_events_profile?: {}; + status?: string; + message?: string; + retry_suggested?: boolean; + error?: { + type: string; + message: string; + }; +} interface Client { getSettings: () => Promise; @@ -124,6 +138,7 @@ interface Client { getJobLogs(job_id: string): Promise; getClickhouseSystemSettings: (host: string, port: number, storage: string) => Promise; summarizeTraceWithProfile: (traceLogs: string, spanType: string, signal?: AbortSignal) => Promise; + fetchProfileEvents: (querySummaries: { [nodeName: string]: QuerySummary }, storage: string) => Promise; } function Client(): Client { @@ -651,6 +666,23 @@ function Client(): Client { } }); }, + fetchProfileEvents: (querySummaries: { [nodeName: string]: QuerySummary }, storage: string) => { + const url = baseUrl + "fetch_profile_events"; + return fetch(url, { + headers: { "Content-Type": "application/json" }, + method: "POST", + body: JSON.stringify({ + query_summaries: querySummaries, + storage: storage + }), + }).then((resp) => { + if (resp.ok || resp.status === 404) { + return resp.json(); + } else { + return resp.json().then(Promise.reject.bind(Promise)); + } + }); + }, }; } diff --git a/snuba/admin/static/tracing/index.tsx b/snuba/admin/static/tracing/index.tsx index c217d91134b..381e212a47e 100644 --- a/snuba/admin/static/tracing/index.tsx +++ b/snuba/admin/static/tracing/index.tsx @@ -1,5 +1,5 @@ -import React, { useState } from "react"; -import { Accordion, Stack, Title, Text, Group, Table } from "@mantine/core"; +import React, { useState, useCallback } from "react"; +import { Accordion, Stack, Title, Text, Group, Table, Loader, Alert } from "@mantine/core"; import Client from "SnubaAdmin/api_client"; import QueryDisplay from "SnubaAdmin/tracing/query_display"; @@ -63,6 +63,96 @@ function getMessageCategory(logLine: LogLine): MessageCategory { } function TracingQueries(props: { api: Client }) { + const [profileEventsCache, setProfileEventsCache] = useState<{ + [timestamp: number]: { + loading: boolean; + error: string | null; + data: ProfileEvent | null; + retryCount: number; + }; + }>({}); + + const fetchProfileEventsWithRetry = useCallback(async ( + querySummaries: { [nodeName: string]: QuerySummary }, + storage: string, + timestamp: number, + retryCount: number = 0 + ) => { + const MAX_RETRIES = 3; + const RETRY_DELAY_MS = 2000; + + setProfileEventsCache(prev => ({ + ...prev, + [timestamp]: { loading: true, error: null, data: prev[timestamp]?.data || null, retryCount } + })); + + try { + const response = await props.api.fetchProfileEvents(querySummaries, storage); + + if (response.error) { + throw new Error(response.error.message); + } + + if (response.status === "not_ready" && retryCount < MAX_RETRIES) { + setTimeout(() => { + fetchProfileEventsWithRetry(querySummaries, storage, timestamp, retryCount + 1); + }, RETRY_DELAY_MS); + return; + } + + if (response.status === "not_ready") { + setProfileEventsCache(prev => ({ + ...prev, + [timestamp]: { + loading: false, + error: "Profile events not ready. Try again in a few seconds.", + data: null, + retryCount + } + })); + return; + } + + setProfileEventsCache(prev => ({ + ...prev, + [timestamp]: { + loading: false, + error: null, + data: response.profile_events_results as ProfileEvent, + retryCount + } + })); + } catch (error) { + setProfileEventsCache(prev => ({ + ...prev, + [timestamp]: { + loading: false, + error: error instanceof Error ? error.message : "Failed to fetch", + data: null, + retryCount + } + })); + } + }, [props.api]); + + const handleProfileEventsAccordionChange = useCallback((value: string | null, queryResult: TracingResult) => { + if (value === "profile-events") { + const timestamp = queryResult.timestamp; + const cached = profileEventsCache[timestamp]; + + if (!cached || (!cached.loading && !cached.data && !cached.error)) { + if (queryResult.summarized_trace_output?.query_summaries && queryResult.storage) { + fetchProfileEventsWithRetry( + queryResult.summarized_trace_output.query_summaries, + queryResult.storage, + timestamp, + 0 + ); + } + } + } + }, [profileEventsCache, fetchProfileEventsWithRetry]); + function tablePopulator(queryResult: TracingResult, showFormatted: boolean) { var elements = {}; if (queryResult.error) { @@ -70,10 +160,10 @@ function TracingQueries(props: { api: Client }) { } else { elements = { Trace: [queryResult, 400] }; } - return tracingOutput(elements, showFormatted); + return tracingOutput(elements, showFormatted, queryResult); } - function tracingOutput(elements: Object, showFormatted: boolean) { + function tracingOutput(elements: Object, showFormatted: boolean, queryResult: TracingResult) { return ( <>
@@ -98,7 +188,7 @@ function TracingQueries(props: { api: Client }) {
Number of rows in result set: {value.num_rows_result}
- {summarizedTraceDisplay(value.summarized_trace_output, value.profile_events_results)} + {summarizedTraceDisplay(value.summarized_trace_output, value.profile_events_results, queryResult)} ); } else { @@ -107,7 +197,7 @@ function TracingQueries(props: { api: Client }) {
Number of rows in result set: {value.num_rows_result}
- {rawTraceDisplay(title, value.trace_output, value.profile_events_results)} + {rawTraceDisplay(title, value.trace_output, value.profile_events_results, queryResult)} ); } @@ -117,17 +207,36 @@ function TracingQueries(props: { api: Client }) { ); } - function rawTraceDisplay(title: string, value: any, profileEventResults: ProfileEvent): JSX.Element | undefined { + function rawTraceDisplay(title: string, value: any, profileEventResults: ProfileEvent, queryResult: TracingResult): JSX.Element | undefined { const parsedLines: Array = value.split(/\n/); + const timestamp = queryResult.timestamp; + const profileEventsState = profileEventsCache[timestamp]; + const effectiveProfileEvents = profileEventsState?.data || profileEventResults || {}; const profileEventRows: Array = []; - for (const [k, v] of Object.entries(profileEventResults)) { + for (const [k, v] of Object.entries(effectiveProfileEvents)) { profileEventRows.push(k + '=>' + v.rows[0]); } return (
    Profile Events Output + {profileEventsState?.loading && ( +
  1. + + Loading profile events... +
  2. + )} + {profileEventsState?.error && ( +
  3. + {profileEventsState.error} +
  4. + )} + {!profileEventsState?.loading && profileEventRows.length === 0 && ( +
  5. + No profile events found +
  6. + )} {profileEventRows.map((line, index) => { const node_name = line.split("=>")[0]; const row = line.split("=>")[1]; @@ -268,8 +377,13 @@ function TracingQueries(props: { api: Client }) { function summarizedTraceDisplay( value: TracingSummary, - profileEventResults: ProfileEvent + profileEventResults: ProfileEvent, + queryResult: TracingResult ): JSX.Element | undefined { + const timestamp = queryResult.timestamp; + const profileEventsState = profileEventsCache[timestamp]; + const effectiveProfileEvents = profileEventsState?.data || profileEventResults || {}; + let dist_node; let nodes = []; for (const [host, summary] of Object.entries(value.query_summaries)) { @@ -289,13 +403,29 @@ function TracingQueries(props: { api: Client }) { .filter((q: QuerySummary) => !q.is_distributed) .map((q: QuerySummary) => querySummary(q))} - + handleProfileEventsAccordionChange(value, queryResult)}> Profile Events Output - {Object.entries(profileEventResults).map(([host, event]) => ( + {profileEventsState?.loading && ( + + + Loading profile events... (Attempt {profileEventsState.retryCount + 1}/4) + + )} + {profileEventsState?.error && ( + + {profileEventsState.error} + + )} + {!profileEventsState?.loading && Object.keys(effectiveProfileEvents).length === 0 && ( + + No profile events were found for this query. + + )} + {Object.entries(effectiveProfileEvents).map(([host, event]) => ( diff --git a/snuba/admin/static/tracing/query_display.tsx b/snuba/admin/static/tracing/query_display.tsx index 8166257ea73..f08ce9addc0 100644 --- a/snuba/admin/static/tracing/query_display.tsx +++ b/snuba/admin/static/tracing/query_display.tsx @@ -1,7 +1,6 @@ import React, { useEffect, useState } from "react"; import { Accordion, - Switch, Code, Stack, Title, @@ -29,10 +28,8 @@ function QueryDisplay(props: { predefinedQueryOptions: Array; }) { const [storages, setStorages] = useState([]); - const [checkedGatherProfileEvents, setCheckedGatherProfileEvents] = useState(true); const [query, setQuery] = useState({ storage: getParamFromStorage("storage"), - gather_profile_events: checkedGatherProfileEvents }); const [queryResultHistory, setQueryResultHistory] = useState( getRecentHistory(HISTORY_KEY) @@ -55,13 +52,13 @@ function QueryDisplay(props: { } function executeQuery() { - query.gather_profile_events = checkedGatherProfileEvents; return props.api .executeTracingQuery(query as TracingRequest) .then((result) => { const tracing_result = { input_query: `${query.sql}`, timestamp: result.timestamp, + storage: query.storage, num_rows_result: result.num_rows_result, result: result.result, cols: result.cols, @@ -107,16 +104,6 @@ function QueryDisplay(props: { />
    - ) => { - setCheckedGatherProfileEvents(evt.currentTarget.checked); - } - } - onLabel="PROFILE" - offLabel="NO PROFILE" - size="md" - /> >; diff --git a/snuba/admin/views.py b/snuba/admin/views.py index 7b59d72d107..7b08333581c 100644 --- a/snuba/admin/views.py +++ b/snuba/admin/views.py @@ -38,7 +38,11 @@ UnauthorizedForSudo, run_system_query_on_host_with_sql, ) -from snuba.admin.clickhouse.trace_log_parsing import summarize_trace_output +from snuba.admin.clickhouse.trace_log_parsing import ( + QuerySummary, + TracingSummary, + summarize_trace_output, +) from snuba.admin.clickhouse.tracing import TraceOutput, run_query_and_get_trace from snuba.admin.dead_letter_queue import get_dlq_topics from snuba.admin.kafka.topics import get_broker_data @@ -535,22 +539,11 @@ def clickhouse_trace_query() -> Response: ), 400, ) - try_gather_profile_events = req.get("gather_profile_events", True) try: - settings = {} - if try_gather_profile_events: - settings["log_profile_events"] = 1 + settings = {"log_profile_events": 1} query_trace = run_query_and_get_trace(storage, raw_sql, settings) - if try_gather_profile_events: - try: - gather_profile_events(query_trace, storage) - except Exception: - logger.warning( - "Error gathering profile events, returning trace anyway", - exc_info=True, - ) return make_response(jsonify(asdict(query_trace)), 200) except InvalidCustomQuery as err: return make_response( @@ -580,6 +573,107 @@ def clickhouse_trace_query() -> Response: ) +@application.route("/fetch_profile_events", methods=["POST"]) +@check_tool_perms(tools=[AdminTools.QUERY_TRACING]) +def fetch_profile_events() -> Response: + """ + Fetch profile events for query summaries that have already been executed. + This endpoint is called separately from the trace query to allow lazy loading. + """ + req = json.loads(request.data) + try: + query_summaries_dict = req["query_summaries"] + storage = req["storage"] + except KeyError as e: + return make_response( + jsonify( + { + "error": { + "type": "validation", + "message": f"Invalid request, missing key {e.args[0]}", + } + } + ), + 400, + ) + + try: + # Reconstruct QuerySummary objects from the dict + query_summaries = {} + for node_name, summary_dict in query_summaries_dict.items(): + query_summary = QuerySummary( + node_name=summary_dict["node_name"], + is_distributed=summary_dict["is_distributed"], + query_id=summary_dict["query_id"], + execute_summaries=summary_dict.get("execute_summaries"), + select_summaries=summary_dict.get("select_summaries"), + index_summaries=summary_dict.get("index_summaries"), + stream_summaries=summary_dict.get("stream_summaries"), + aggregation_summaries=summary_dict.get("aggregation_summaries"), + sorting_summaries=summary_dict.get("sorting_summaries"), + ) + query_summaries[node_name] = query_summary + + # Create a minimal TraceOutput object + trace_output = TraceOutput( + trace_output="", + summarized_trace_output=TracingSummary(query_summaries=query_summaries), + cols=[], + num_rows_result=0, + result=[], + profile_events_results={}, + profile_events_meta=[], + profile_events_profile={}, + ) + + # Gather profile events + gather_profile_events(trace_output, storage) + + # Check if profile events were successfully gathered + if not trace_output.profile_events_results: + return make_response( + jsonify( + { + "status": "not_ready", + "message": "Profile events are not ready yet. Please try again in a few seconds.", + "retry_suggested": True, + } + ), + 404, + ) + + # Return the profile events + return make_response( + jsonify( + { + "profile_events_results": trace_output.profile_events_results, + "profile_events_meta": trace_output.profile_events_meta, + "profile_events_profile": trace_output.profile_events_profile, + } + ), + 200, + ) + except InvalidNodeError as err: + logger.error(err, exc_info=True) + return make_response( + jsonify( + { + "error": { + "type": "node_error", + "message": str(err), + } + } + ), + 400, + ) + except Exception as err: + logger.error(err, exc_info=True) + return make_response( + jsonify({"error": {"type": "unknown", "message": str(err)}}), + 500, + ) + + @application.route("/rpc_summarize_trace_with_profile", methods=["POST"]) @check_tool_perms(tools=[AdminTools.QUERY_TRACING]) def summarize_trace_with_profile() -> Response: