diff --git a/README.md b/README.md index f0ddd58..96d747a 100644 --- a/README.md +++ b/README.md @@ -312,10 +312,16 @@ cd servers && uvicorn deployed_mcp:app --host 0.0.0.0 --port 8000 ### Viewing traces in Azure Application Insights -By default, OpenTelemetry tracing is enabled for the deployed MCP server, sending traces to Azure Application Insights. +By default, OpenTelemetry tracing is enabled for the deployed MCP server, sending traces to Azure Application Insights. To bring up a dashboard of metrics and traces, run: + +```shell +azd monitor +``` + +Or you can use Application Insights directly: 1. Open the Azure Portal and navigate to the Application Insights resource created during deployment (named `-appinsights`). -2. In Application Insights, go to "Transaction Search" to view traces from the MCP server +2. In Application Insights, go to "Transaction Search" to view traces from the MCP server. 3. You can filter and analyze traces to monitor performance and diagnose issues. ### Viewing traces in Logfire diff --git a/infra/appinsights-dashboard.bicep b/infra/appinsights-dashboard.bicep new file mode 100644 index 0000000..edbdecd --- /dev/null +++ b/infra/appinsights-dashboard.bicep @@ -0,0 +1,786 @@ +@description('Azure region for the dashboard resource.') +param location string = resourceGroup().location + +@description('Dashboard name (3-24 alphanumerics/dashes).') +param name string = 'mcp-tools-dashboard' + +@description('Application Insights component name (for deep links).') +param applicationInsightsName string + +@description('Dashboard default time range (ISO8601 duration, e.g. P12H, P1D).') +param timeRange string = 'P12H' + +var appInsightsResourceId = '/subscriptions/${subscription().subscriptionId}/resourceGroups/${resourceGroup().name}/providers/Microsoft.Insights/components/${applicationInsightsName}' + +resource dashboard 'Microsoft.Portal/dashboards@2020-09-01-preview' = { + name: name + location: location + properties: { + lenses: [ + { + order: 0 + parts: [ + // Header: Tool call analytics (Markdown) + { + position: { x: 0, y: 4, colSpan: 12, rowSpan: 1 } + metadata: any({ + type: 'Extension/HubsExtension/PartType/MarkdownPart' + inputs: [] + settings: { + content: { + settings: { + content: '# Tool call analytics' + title: '' + subtitle: '' + } + } + } + }) + } + // 1) Tools/Call - Counts over time (timechart) + { + position: { x: 0, y: 5, colSpan: 6, rowSpan: 4 } + metadata: { + inputs: [ + { + name: 'resourceTypeMode' + isOptional: true + } + { + name: 'ComponentId' + isOptional: true + } + { + name: 'Scope' + value: { + resourceIds: [appInsightsResourceId] + } + isOptional: true + } + { + name: 'PartId' + value: guid('tile1-${name}') + isOptional: true + } + { + name: 'Version' + value: '2.0' + isOptional: true + } + { + name: 'TimeRange' + value: timeRange + isOptional: true + } + { + name: 'DashboardId' + isOptional: true + } + { + name: 'DraftRequestParameters' + isOptional: true + } + { + name: 'Query' + value: 'dependencies\n| where tostring(customDimensions["mcp.method.name"]) == "tools/call"\n| summarize count() by bin(timestamp, 5m)\n| order by timestamp asc\n| render timechart' + isOptional: true + } + { + name: 'ControlType' + value: 'FrameControlChart' + isOptional: true + } + { + name: 'SpecificChart' + value: 'Line' + isOptional: true + } + { + name: 'PartTitle' + value: 'Tools/Call — counts over time' + isOptional: true + } + { + name: 'PartSubTitle' + value: applicationInsightsName + isOptional: true + } + { + name: 'Dimensions' + value: { + xAxis: { + name: 'timestamp' + type: 'datetime' + } + yAxis: [ + { + name: 'count_' + type: 'long' + } + ] + aggregation: 'Sum' + } + isOptional: true + } + { + name: 'LegendOptions' + value: { + isEnabled: true + position: 'Bottom' + } + isOptional: true + } + { + name: 'IsQueryContainTimeRange' + value: false + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/Microsoft_OperationsManagementSuite_Workspace/PartType/LogsDashboardPart' + settings: {} + } + } + // 2) Success vs Error over time (stacked timechart) + { + position: { x: 6, y: 5, colSpan: 6, rowSpan: 4 } + metadata: { + inputs: [ + { + name: 'resourceTypeMode' + isOptional: true + } + { + name: 'ComponentId' + isOptional: true + } + { + name: 'Scope' + value: { + resourceIds: [appInsightsResourceId] + } + isOptional: true + } + { + name: 'PartId' + value: guid('tile2-${name}') + isOptional: true + } + { + name: 'Version' + value: '2.0' + isOptional: true + } + { + name: 'TimeRange' + value: timeRange + isOptional: true + } + { + name: 'DashboardId' + isOptional: true + } + { + name: 'DraftRequestParameters' + isOptional: true + } + { + name: 'Query' + value: 'dependencies\n| where tostring(customDimensions["mcp.method.name"]) == "tools/call"\n| extend tool_success_raw = tostring(customDimensions["mcp.tool.success"])\n| extend tool_success = case(tolower(tool_success_raw) == "true", "Success", tolower(tool_success_raw) == "false", "Error", tool_success_raw)\n| summarize count() by bin(timestamp, 5m), tool_success\n| order by timestamp asc\n| render timechart' + isOptional: true + } + { + name: 'ControlType' + value: 'FrameControlChart' + isOptional: true + } + { + name: 'SpecificChart' + value: 'StackedArea' + isOptional: true + } + { + name: 'PartTitle' + value: 'Tools/Call — Success vs Error' + isOptional: true + } + { + name: 'PartSubTitle' + value: applicationInsightsName + isOptional: true + } + { + name: 'Dimensions' + value: { + xAxis: { + name: 'timestamp' + type: 'datetime' + } + yAxis: [ + { + name: 'count_' + type: 'long' + } + ] + splitBy: [ + { + name: 'tool_success' + type: 'string' + } + ] + aggregation: 'Sum' + } + isOptional: true + } + { + name: 'LegendOptions' + value: { + isEnabled: true + position: 'Bottom' + } + isOptional: true + } + { + name: 'IsQueryContainTimeRange' + value: false + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/Microsoft_OperationsManagementSuite_Workspace/PartType/LogsDashboardPart' + settings: {} + } + } + // 3) Success rate (%) over time (single line) + { + position: { x: 0, y: 9, colSpan: 6, rowSpan: 4 } + metadata: { + inputs: [ + { + name: 'resourceTypeMode' + isOptional: true + } + { + name: 'ComponentId' + isOptional: true + } + { + name: 'Scope' + value: { + resourceIds: [appInsightsResourceId] + } + isOptional: true + } + { + name: 'PartId' + value: guid('tile3-${name}') + isOptional: true + } + { + name: 'Version' + value: '2.0' + isOptional: true + } + { + name: 'TimeRange' + value: timeRange + isOptional: true + } + { + name: 'DashboardId' + isOptional: true + } + { + name: 'DraftRequestParameters' + isOptional: true + } + { + name: 'Query' + value: 'dependencies\n| where tostring(customDimensions["mcp.method.name"]) == "tools/call"\n| extend isError = iff(tolower(tostring(customDimensions["mcp.tool.success"])) == "false", 1, 0)\n| summarize total = count(), errors = sum(isError) by bin(timestamp, 5m)\n| extend success_rate_pct = 100.0 * (total - errors) / total\n| project timestamp, success_rate_pct\n| order by timestamp asc\n| render timechart' + isOptional: true + } + { + name: 'ControlType' + value: 'FrameControlChart' + isOptional: true + } + { + name: 'SpecificChart' + value: 'Line' + isOptional: true + } + { + name: 'PartTitle' + value: 'Tools/Call — Success rate (%)' + isOptional: true + } + { + name: 'PartSubTitle' + value: applicationInsightsName + isOptional: true + } + { + name: 'Dimensions' + value: { + xAxis: { + name: 'timestamp' + type: 'datetime' + } + yAxis: [ + { + name: 'success_rate_pct' + type: 'real' + } + ] + aggregation: 'Sum' + } + isOptional: true + } + { + name: 'LegendOptions' + value: { + isEnabled: true + position: 'Bottom' + } + isOptional: true + } + { + name: 'IsQueryContainTimeRange' + value: false + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/Microsoft_OperationsManagementSuite_Workspace/PartType/LogsDashboardPart' + settings: {} + } + } + // 4) Calls by tool (bar/column chart) + { + position: { x: 6, y: 9, colSpan: 6, rowSpan: 4 } + metadata: { + inputs: [ + { + name: 'resourceTypeMode' + isOptional: true + } + { + name: 'ComponentId' + isOptional: true + } + { + name: 'Scope' + value: { + resourceIds: [appInsightsResourceId] + } + isOptional: true + } + { + name: 'PartId' + value: guid('tile4-${name}') + isOptional: true + } + { + name: 'Version' + value: '2.0' + isOptional: true + } + { + name: 'TimeRange' + value: timeRange + isOptional: true + } + { + name: 'DashboardId' + isOptional: true + } + { + name: 'DraftRequestParameters' + isOptional: true + } + { + name: 'Query' + value: 'dependencies\n| where tostring(customDimensions["mcp.method.name"]) == "tools/call"\n| extend tool = coalesce(tostring(customDimensions["gen_ai.tool.name"]), target, name)\n| summarize count() by tool\n| order by count_ desc\n| render barchart' + isOptional: true + } + { + name: 'ControlType' + value: 'FrameControlChart' + isOptional: true + } + { + name: 'SpecificChart' + value: 'Bar' + isOptional: true + } + { + name: 'PartTitle' + value: 'Tools/Call — calls by tool' + isOptional: true + } + { + name: 'PartSubTitle' + value: applicationInsightsName + isOptional: true + } + { + name: 'Dimensions' + value: { + xAxis: { + name: 'tool' + type: 'string' + } + yAxis: [ + { + name: 'count_' + type: 'long' + } + ] + aggregation: 'Sum' + } + isOptional: true + } + { + name: 'LegendOptions' + value: { + isEnabled: true + position: 'Bottom' + } + isOptional: true + } + { + name: 'IsQueryContainTimeRange' + value: false + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/Microsoft_OperationsManagementSuite_Workspace/PartType/LogsDashboardPart' + settings: {} + } + } + // 5) Latency percentiles (p50/p95/p99) over time + { + position: { x: 0, y: 13, colSpan: 12, rowSpan: 4 } + metadata: { + inputs: [ + { + name: 'resourceTypeMode' + isOptional: true + } + { + name: 'ComponentId' + isOptional: true + } + { + name: 'Scope' + value: { + resourceIds: [appInsightsResourceId] + } + isOptional: true + } + { + name: 'PartId' + value: guid('tile5-${name}') + isOptional: true + } + { + name: 'Version' + value: '2.0' + isOptional: true + } + { + name: 'TimeRange' + value: timeRange + isOptional: true + } + { + name: 'DashboardId' + isOptional: true + } + { + name: 'DraftRequestParameters' + isOptional: true + } + { + name: 'Query' + value: 'dependencies\n| where tostring(customDimensions["mcp.method.name"]) == "tools/call"\n| summarize p50 = percentile(duration, 50), p95 = percentile(duration, 95), p99 = percentile(duration, 99) by bin(timestamp, 5m)\n| order by timestamp asc\n| render timechart' + isOptional: true + } + { + name: 'ControlType' + value: 'FrameControlChart' + isOptional: true + } + { + name: 'SpecificChart' + value: 'Line' + isOptional: true + } + { + name: 'PartTitle' + value: 'Tools/Call — latency percentiles (ms)' + isOptional: true + } + { + name: 'PartSubTitle' + value: applicationInsightsName + isOptional: true + } + { + name: 'Dimensions' + value: { + xAxis: { + name: 'timestamp' + type: 'datetime' + } + yAxis: [ + { + name: 'p50' + type: 'real' + } + { + name: 'p95' + type: 'real' + } + { + name: 'p99' + type: 'real' + } + ] + aggregation: 'Sum' + } + isOptional: true + } + { + name: 'LegendOptions' + value: { + isEnabled: true + position: 'Bottom' + } + isOptional: true + } + { + name: 'IsQueryContainTimeRange' + value: false + isOptional: true + } + ] + #disable-next-line BCP036 + type: 'Extension/Microsoft_OperationsManagementSuite_Workspace/PartType/LogsDashboardPart' + settings: {} + } + } + // 6) Failed requests (metric chart) + { + position: { x: 0, y: 1, colSpan: 6, rowSpan: 3 } + metadata: any({ + type: 'Extension/HubsExtension/PartType/MonitorChartPart' + inputs: [ + { + name: 'options' + value: { + chart: { + metrics: [ + { + resourceMetadata: { + id: appInsightsResourceId + } + name: 'requests/failed' + aggregationType: 7 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Failed requests' + color: '#EC008C' + } + } + ] + title: 'Failed requests' + visualization: { + chartType: 3 + legendVisualization: { + isVisible: true + position: 2 + hideSubtitle: false + } + axisVisualization: { + x: { + isVisible: true + axisType: 2 + } + y: { + isVisible: true + axisType: 1 + } + } + } + openBladeOnClick: { + openBlade: true + destinationBlade: { + extensionName: 'HubsExtension' + bladeName: 'ResourceMenuBlade' + parameters: { + id: appInsightsResourceId + menuid: 'failures' + } + } + } + } + } + } + { + name: 'sharedTimeRange' + isOptional: true + } + ] + settings: {} + }) + } + // 7) Server response time (metric chart) + { + position: { x: 6, y: 1, colSpan: 6, rowSpan: 3 } + metadata: any({ + type: 'Extension/HubsExtension/PartType/MonitorChartPart' + inputs: [ + { + name: 'options' + value: { + chart: { + metrics: [ + { + resourceMetadata: { + id: appInsightsResourceId + } + name: 'requests/duration' + aggregationType: 4 + namespace: 'microsoft.insights/components' + metricVisualization: { + displayName: 'Server response time' + color: '#00BCF2' + } + } + ] + title: 'Server response time' + visualization: { + chartType: 2 + legendVisualization: { + isVisible: true + position: 2 + hideSubtitle: false + } + axisVisualization: { + x: { + isVisible: true + axisType: 2 + } + y: { + isVisible: true + axisType: 1 + } + } + } + openBladeOnClick: { + openBlade: true + destinationBlade: { + extensionName: 'HubsExtension' + bladeName: 'ResourceMenuBlade' + parameters: { + id: appInsightsResourceId + menuid: 'performance' + } + } + } + } + } + } + { + name: 'sharedTimeRange' + isOptional: true + } + ] + settings: {} + }) + } + // 9) Failures curated blade tile + { + position: { x: 0, y: 0, colSpan: 6, rowSpan: 1 } + metadata: any({ + inputs: [ + { + name: 'ResourceId' + value: appInsightsResourceId + } + { + name: 'DataModel' + value: { + version: '1.0.0' + timeContext: { + durationMs: 86400000 + createdTime: '2018-05-04T23:42:40.072Z' + isInitialTime: false + grain: 1 + useDashboardTimeRange: false + } + } + isOptional: true + } + { + name: 'ConfigurationId' + value: '8a02f7bf-ac0f-40e1-afe9-f0e72cfee77f' + isOptional: true + } + ] + type: 'Extension/AppInsightsExtension/PartType/CuratedBladeFailuresPinnedPart' + isAdapter: true + asset: { + idInputName: 'ResourceId' + type: 'ApplicationInsights' + } + defaultMenuItemId: 'failures' + }) + } + // 10) Performance curated blade tile + { + position: { x: 6, y: 0, colSpan: 6, rowSpan: 1 } + metadata: any({ + inputs: [ + { + name: 'ResourceId' + value: appInsightsResourceId + } + { + name: 'DataModel' + value: { + version: '1.0.0' + timeContext: { + durationMs: 86400000 + createdTime: '2018-05-04T23:43:37.804Z' + isInitialTime: false + grain: 1 + useDashboardTimeRange: false + } + } + isOptional: true + } + { + name: 'ConfigurationId' + value: '2a8ede4f-2bee-4b9c-aed9-2db0e8a01865' + isOptional: true + } + ] + type: 'Extension/AppInsightsExtension/PartType/CuratedBladePerformancePinnedPart' + isAdapter: true + asset: { + idInputName: 'ResourceId' + type: 'ApplicationInsights' + } + defaultMenuItemId: 'performance' + }) + } + ] + metadata: {} + } + ] + metadata: {} + } +} diff --git a/infra/main.bicep b/infra/main.bicep index 285217a..5fbe45b 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -95,6 +95,8 @@ param logfireToken string = '' // Derived booleans for backward compatibility in bicep modules var useKeycloak = mcpAuthProvider == 'keycloak' var useEntraProxy = mcpAuthProvider == 'entra_proxy' +// Auth is considered enabled when either Keycloak or Entra OAuth Proxy is used +var authEnabled = useKeycloak || useEntraProxy var resourceToken = toLower(uniqueString(subscription().id, name, location)) var tags = { 'azd-env-name': name } @@ -180,29 +182,36 @@ module cosmosDb 'br/public:avm/res/document-db/database-account:0.6.1' = { sqlDatabases: [ { name: cosmosDbDatabaseName - containers: [ - { - name: cosmosDbContainerName - kind: 'Hash' - paths: [ - '/category' - ] - } - { - name: cosmosDbUserContainerName - kind: 'Hash' - paths: [ - '/user_id' - ] - } - { - name: cosmosDbOAuthContainerName - kind: 'Hash' - paths: [ - '/collection' - ] - } - ] + // Always create the base expenses container; add auth-related containers only when authentication is enabled + containers: concat( + [ + { + name: cosmosDbContainerName + kind: 'Hash' + paths: [ + '/category' + ] + } + ], + authEnabled + ? [ + { + name: cosmosDbUserContainerName + kind: 'Hash' + paths: [ + '/user_id' + ] + } + { + name: cosmosDbOAuthContainerName + kind: 'Hash' + paths: [ + '/collection' + ] + } + ] + : [] + ) } ] } @@ -237,6 +246,17 @@ module applicationInsights 'br/public:avm/res/insights/component:0.4.2' = if (us } } +// Portal dashboard with Log Analytics queries visualizing MCP tools metrics +module applicationInsightsDashboard 'appinsights-dashboard.bicep' = if (useAppInsights) { + name: 'application-insights-dashboard' + scope: resourceGroup + params: { + name: '${prefix}-dashboard' + location: location + applicationInsightsName: applicationInsights!.outputs.name + } +} + // https://learn.microsoft.com/en-us/azure/container-apps/firewall-integration?tabs=consumption-only module containerAppsNSG 'br/public:avm/res/network/network-security-group:0.5.1' = if (useVnet) { name: 'containerAppsNSG' diff --git a/servers/opentelemetry_middleware.py b/servers/opentelemetry_middleware.py index c8451d6..6bdbb23 100644 --- a/servers/opentelemetry_middleware.py +++ b/servers/opentelemetry_middleware.py @@ -93,7 +93,6 @@ async def on_call_tool(self, context: MiddlewareContext, call_next): attributes: dict[str, AttributeValue] = { "mcp.method.name": method_name, - # PR #2083 aligns tool/prompt naming with GenAI attributes. "gen_ai.tool.name": tool_name, "gen_ai.operation.name": "execute_tool", } diff --git a/spanish/README.md b/spanish/README.md index 1c29d21..f17e8bf 100644 --- a/spanish/README.md +++ b/spanish/README.md @@ -287,6 +287,20 @@ Podés usar la [calculadora de precios de Azure](https://azure.com/e/3987c81282c ⚠️ Para evitar costos innecesarios, recordá dar de baja la app si ya no la usás, borrando el grupo de recursos en el Portal o ejecutando `azd down`. +### Ver trazas en Azure Application Insights + +Por defecto, el tracing de OpenTelemetry está habilitado para el servidor MCP desplegado y envía trazas a Azure Application Insights. Para abrir un dashboard con métricas y trazas, ejecutá: + +```shell +azd monitor +``` + +O también podés usar Application Insights directamente: + +1. Abrí el Portal de Azure y navegá al recurso de Application Insights creado durante el despliegue (con nombre `-appinsights`). +2. En Application Insights, andá a "Transaction Search" para ver trazas del servidor MCP. +3. Podés filtrar y analizar las trazas para monitorear rendimiento y diagnosticar problemas. + --- ## Desplegar en Azure con red privada