diff --git a/code/app.py b/code/app.py index 4b54b5c7a..547f472f2 100644 --- a/code/app.py +++ b/code/app.py @@ -24,6 +24,7 @@ azure_package_log_level = getattr( logging, PACKAGE_LOGGING_LEVEL.upper(), logging.WARNING ) + for logger_name in AZURE_LOGGING_PACKAGES: logging.getLogger(logger_name).setLevel(azure_package_log_level) @@ -33,6 +34,24 @@ configure_azure_monitor() HTTPXClientInstrumentor().instrument() # httpx is used by openai + # Register ConversationSpanProcessor to propagate conversation_id/user_id to all child spans + from opentelemetry import trace as otel_trace + from create_app import ConversationSpanProcessor + + provider = otel_trace.get_tracer_provider() + if hasattr(provider, "add_span_processor"): + provider.add_span_processor(ConversationSpanProcessor()) + + # Suppress noisy Azure SDK loggers AFTER configure_azure_monitor() + # to prevent it from overriding our levels + _NOISY_AZURE_LOGGERS = [ + "azure.core.pipeline.policies.http_logging_policy", + "azure.monitor.opentelemetry.exporter", + "azure.identity", + ] + for logger_name in _NOISY_AZURE_LOGGERS: + logging.getLogger(logger_name).setLevel(logging.WARNING) + # pylint: disable=wrong-import-position from create_app import create_app # noqa: E402 diff --git a/code/backend/Admin.py b/code/backend/Admin.py index e5354fe30..4077af111 100644 --- a/code/backend/Admin.py +++ b/code/backend/Admin.py @@ -27,6 +27,7 @@ azure_package_log_level = getattr( logging, PACKAGE_LOGGING_LEVEL.upper(), logging.WARNING ) + for logger_name in AZURE_LOGGING_PACKAGES: logging.getLogger(logger_name).setLevel(azure_package_log_level) @@ -35,6 +36,16 @@ if os.getenv("APPLICATIONINSIGHTS_ENABLED", "false").lower() == "true": configure_azure_monitor() + # Suppress noisy Azure SDK loggers AFTER configure_azure_monitor() + # to prevent it from overriding our levels + _NOISY_AZURE_LOGGERS = [ + "azure.core.pipeline.policies.http_logging_policy", + "azure.monitor.opentelemetry.exporter", + "azure.identity", + ] + for logger_name in _NOISY_AZURE_LOGGERS: + logging.getLogger(logger_name).setLevel(logging.WARNING) + logger = logging.getLogger(__name__) logger.debug("Starting admin app") diff --git a/code/backend/api/chat_history.py b/code/backend/api/chat_history.py index 8a86b8119..117cc4472 100644 --- a/code/backend/api/chat_history.py +++ b/code/backend/api/chat_history.py @@ -10,6 +10,7 @@ from backend.batch.utilities.helpers.config.config_helper import ConfigHelper from backend.batch.utilities.helpers.env_helper import EnvHelper from backend.batch.utilities.chat_history.database_factory import DatabaseFactory +from backend.batch.utilities.loggers.event_utils import track_event_if_configured load_dotenv() bp_chat_history_response = Blueprint("chat_history", __name__) @@ -110,6 +111,11 @@ async def rename_conversation(): if not title or title.strip() == "": return jsonify({"error": "A non-empty title is required"}), 400 + track_event_if_configured("HistoryRenameRequested", { + "conversation_id": conversation_id, + "user_id": user_id, + }) + # Initialize and connect to the database client conversation_client = init_database_client() if not conversation_client: @@ -167,6 +173,11 @@ async def get_conversation(): if not conversation_id: return jsonify({"error": "conversation_id is required"}), 400 + track_event_if_configured("HistoryReadRequested", { + "conversation_id": conversation_id, + "user_id": user_id, + }) + # Initialize and connect to the database client conversation_client = init_database_client() if not conversation_client: @@ -246,6 +257,11 @@ async def delete_conversation(): 400, ) + track_event_if_configured("HistoryDeleteRequested", { + "conversation_id": conversation_id, + "user_id": user_id, + }) + # Initialize and connect to the database client conversation_client = init_database_client() if not conversation_client: @@ -369,6 +385,11 @@ async def update_conversation(): if not conversation_id: return jsonify({"error": "conversation_id is required"}), 400 + track_event_if_configured("HistoryUpdateRequested", { + "conversation_id": conversation_id, + "user_id": user_id, + }) + messages = request_json["messages"] if not messages or len(messages) == 0: return jsonify({"error": "Messages are required"}), 400 diff --git a/code/backend/batch/function_app.py b/code/backend/batch/function_app.py index 8a021d7ec..e4ec7fa17 100644 --- a/code/backend/batch/function_app.py +++ b/code/backend/batch/function_app.py @@ -25,12 +25,23 @@ azure_package_log_level = getattr( logging, PACKAGE_LOGGING_LEVEL.upper(), logging.WARNING ) + for logger_name in AZURE_LOGGING_PACKAGES: logging.getLogger(logger_name).setLevel(azure_package_log_level) if os.getenv("APPLICATIONINSIGHTS_ENABLED", "false").lower() == "true": configure_azure_monitor() + # Suppress noisy Azure SDK loggers AFTER configure_azure_monitor() + # to prevent it from overriding our levels + _NOISY_AZURE_LOGGERS = [ + "azure.core.pipeline.policies.http_logging_policy", + "azure.monitor.opentelemetry.exporter", + "azure.identity", + ] + for logger_name in _NOISY_AZURE_LOGGERS: + logging.getLogger(logger_name).setLevel(logging.WARNING) + app = func.FunctionApp( http_auth_level=func.AuthLevel.FUNCTION ) # change to ANONYMOUS for local debugging diff --git a/code/backend/batch/utilities/loggers/event_utils.py b/code/backend/batch/utilities/loggers/event_utils.py new file mode 100644 index 000000000..d873916c6 --- /dev/null +++ b/code/backend/batch/utilities/loggers/event_utils.py @@ -0,0 +1,32 @@ +""" +Utility for tracking custom events to Application Insights. +""" + +import os +import logging + +logger = logging.getLogger(__name__) + + +def track_event_if_configured(event_name: str, event_data: dict): + """Track custom event to Application Insights if configured. + + Args: + event_name: Name of the event to track + event_data: Dictionary of event properties + """ + if os.getenv("APPLICATIONINSIGHTS_ENABLED", "false").lower() == "true": + try: + from azure.monitor.events.extension import track_event + + track_event(event_name, event_data) + except ImportError: + logger.warning( + "azure-monitor-events-extension not installed. Skipping track_event for %s", + event_name, + ) + else: + logger.debug( + "Skipping track_event for %s: Application Insights is not enabled", + event_name, + ) diff --git a/code/create_app.py b/code/create_app.py index 2f2ac296e..512d29bf5 100644 --- a/code/create_app.py +++ b/code/create_app.py @@ -2,6 +2,7 @@ This module creates a Flask app that serves the web interface for the chatbot. """ +import contextvars import functools import json import logging @@ -29,6 +30,26 @@ from backend.batch.utilities.helpers.azure_blob_storage_client import ( AzureBlobStorageClient, ) +from backend.batch.utilities.loggers.event_utils import track_event_if_configured +from backend.batch.utilities.chat_history.auth_utils import get_authenticated_user_details +from opentelemetry import trace +from opentelemetry.sdk.trace import SpanProcessor + +_conversation_id_var: contextvars.ContextVar[str] = contextvars.ContextVar("conversation_id", default="") +_user_id_var: contextvars.ContextVar[str] = contextvars.ContextVar("user_id", default="") + + +class ConversationSpanProcessor(SpanProcessor): + """Attaches conversation_id and user_id to every span created during a request.""" + + def on_start(self, span, parent_context=None): + conversation_id = _conversation_id_var.get() + user_id = _user_id_var.get() + if conversation_id: + span.set_attribute("conversation_id", conversation_id) + if user_id: + span.set_attribute("user_id", user_id) + ERROR_429_MESSAGE = "We're currently experiencing a high number of requests for the service you're trying to access. Please wait a moment and try again." ERROR_GENERIC_MESSAGE = "An error occurred. Please try again. If the problem persists, please contact the site administrator." @@ -413,6 +434,32 @@ def create_app(): logger.debug("Starting web app") + @app.before_request + def set_span_attributes(): + """Middleware to attach conversation_id and user_id to the current OpenTelemetry span and context vars.""" + if request.method == "POST" and request.is_json: + try: + body = request.get_json(silent=True) or {} + conversation_id = body.get("conversation_id", "") + authenticated_user = get_authenticated_user_details(request_headers=request.headers) + user_id = authenticated_user.get("user_principal_id", "") + _conversation_id_var.set(conversation_id) + _user_id_var.set(user_id) + span = trace.get_current_span() + if span: + if conversation_id: + span.set_attribute("conversation_id", conversation_id) + if user_id: + span.set_attribute("user_id", user_id) + except Exception: + pass # Don't let telemetry middleware break requests + + @app.teardown_request + def clear_span_context(exc=None): + """Clear conversation context vars after each request.""" + _conversation_id_var.set("") + _user_id_var.set("") + @app.route("/", defaults={"path": "index.html"}) @app.route("/") def static_file(path): @@ -558,6 +605,15 @@ def get_file(filename): def conversation_azure_byod(): logger.info("Method conversation_azure_byod started") try: + authenticated_user = get_authenticated_user_details(request_headers=request.headers) + user_id = authenticated_user.get("user_principal_id", "") + conversation_id = request.json.get("conversation_id", "") + + track_event_if_configured("ConversationBYODRequestReceived", { + "conversation_id": conversation_id, + "user_id": user_id, + }) + if should_use_data(env_helper, azure_search_helper): return conversation_with_data(request, env_helper) else: @@ -565,6 +621,12 @@ def conversation_azure_byod(): except APIStatusError as e: error_message = str(e) logger.exception("Exception in /api/conversation | %s", error_message) + track_event_if_configured("ConversationBYODError", { + "conversation_id": locals().get("conversation_id", ""), + "user_id": locals().get("user_id", ""), + "error": error_message, + "error_type": type(e).__name__, + }) response_json = e.response.json() response_message = response_json.get("error", {}).get("message", "") response_code = response_json.get("error", {}).get("code", "") @@ -574,6 +636,12 @@ def conversation_azure_byod(): except Exception as e: error_message = str(e) logger.exception("Exception in /api/conversation | %s", error_message) + track_event_if_configured("ConversationBYODError", { + "conversation_id": locals().get("conversation_id", ""), + "user_id": locals().get("user_id", ""), + "error": error_message, + "error_type": type(e).__name__, + }) return jsonify({"error": ERROR_GENERIC_MESSAGE}), 500 finally: logger.info("Method conversation_azure_byod ended") @@ -583,8 +651,16 @@ async def conversation_custom(): try: logger.info("Method conversation_custom started") + authenticated_user = get_authenticated_user_details(request_headers=request.headers) + user_id = authenticated_user.get("user_principal_id", "") user_message = request.json["messages"][-1]["content"] conversation_id = request.json["conversation_id"] + + track_event_if_configured("ConversationCustomRequestReceived", { + "conversation_id": conversation_id, + "user_id": user_id, + }) + user_assistant_messages = list( filter( lambda x: x["role"] in ("user", "assistant"), @@ -599,6 +675,11 @@ async def conversation_custom(): orchestrator=get_orchestrator_config(), ) + track_event_if_configured("ConversationCustomSuccess", { + "conversation_id": conversation_id, + "user_id": user_id, + }) + response_obj = { "id": "response.id", "model": env_helper.AZURE_OPENAI_MODEL, @@ -612,6 +693,12 @@ async def conversation_custom(): except APIStatusError as e: error_message = str(e) logger.exception("Exception in /api/conversation | %s", error_message) + track_event_if_configured("ConversationCustomError", { + "conversation_id": locals().get("conversation_id", ""), + "user_id": locals().get("user_id", ""), + "error": error_message, + "error_type": type(e).__name__, + }) response_json = e.response.json() response_message = response_json.get("error", {}).get("message", "") response_code = response_json.get("error", {}).get("code", "") @@ -621,6 +708,12 @@ async def conversation_custom(): except Exception as e: error_message = str(e) logger.exception("Exception in /api/conversation | %s", error_message) + track_event_if_configured("ConversationCustomError", { + "conversation_id": locals().get("conversation_id", ""), + "user_id": locals().get("user_id", ""), + "error": error_message, + "error_type": type(e).__name__, + }) return jsonify({"error": ERROR_GENERIC_MESSAGE}), 500 finally: logger.info("Method conversation_custom ended") diff --git a/infra/main.bicep b/infra/main.bicep index fb8cef219..7fce1e092 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -1293,6 +1293,7 @@ module web 'modules/app/web.bicep' = { AZURE_CLIENT_ID: managedIdentityModule.outputs.clientId // Required so LangChain AzureSearch vector store authenticates with this user-assigned managed identity APP_ENV: appEnvironment AZURE_SEARCH_DIMENSIONS: azureSearchDimensions + APPLICATIONINSIGHTS_ENABLED: enableMonitoring ? 'true' : 'false' }, databaseType == 'CosmosDB' ? { @@ -1392,6 +1393,7 @@ module adminweb 'modules/app/adminweb.bicep' = { MANAGED_IDENTITY_RESOURCE_ID: managedIdentityModule.outputs.resourceId APP_ENV: appEnvironment AZURE_SEARCH_DIMENSIONS: azureSearchDimensions + APPLICATIONINSIGHTS_ENABLED: enableMonitoring ? 'true' : 'false' }, databaseType == 'CosmosDB' ? { @@ -1494,6 +1496,7 @@ module function 'modules/app/function.bicep' = { APP_ENV: appEnvironment BACKEND_URL: backendUrl AZURE_SEARCH_DIMENSIONS: azureSearchDimensions + APPLICATIONINSIGHTS_ENABLED: enableMonitoring ? 'true' : 'false' }, databaseType == 'CosmosDB' ? { diff --git a/infra/modules/core/monitor/monitoring.bicep b/infra/modules/core/monitor/monitoring.bicep index fea29d2bd..52438fb3b 100644 --- a/infra/modules/core/monitor/monitoring.bicep +++ b/infra/modules/core/monitor/monitoring.bicep @@ -131,7 +131,6 @@ module avmAppInsights 'br/public:avm/res/insights/component:0.6.0' = { disableIpMasking: false flowType: 'Bluefield' workspaceResourceId: empty(workspaceResourceId) ? '' : workspaceResourceId - diagnosticSettings: empty(workspaceResourceId) ? null : [{ workspaceResourceId: workspaceResourceId }] } } diff --git a/poetry.lock b/poetry.lock index 469a9b72c..a8911a36d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -871,6 +871,22 @@ files = [ [package.dependencies] azure-core = ">=1.32.0" +[[package]] +name = "azure-monitor-events-extension" +version = "0.1.0" +description = "Microsoft Azure Monitor Events Extension for Python" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "azure-monitor-events-extension-0.1.0.tar.gz", hash = "sha256:094773685171a50aa5cc548279c9141c8a26682f6acef397815c528b53b838b5"}, + {file = "azure_monitor_events_extension-0.1.0-py2.py3-none-any.whl", hash = "sha256:5d92abb5e6a32ab23b12c726def9f9607c6fa1d84900d493b906ff9ec489af4a"}, +] + +[package.dependencies] +opentelemetry-api = ">=1.20,<2.0" +opentelemetry-sdk = ">=1.20,<2.0" + [[package]] name = "azure-monitor-opentelemetry" version = "1.6.12" @@ -8426,4 +8442,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = "^3.10" -content-hash = "3d093c112947bdf324788608f34246c975a0fb085a60c09eac88030440208824" +content-hash = "4f3da332d8827a1b713038cc8f4dc9f80b543831f414279726e694505e6837fc" diff --git a/pyproject.toml b/pyproject.toml index 6719d4462..b818ad166 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ python-docx = "1.2.0" azure-keyvault-secrets = "4.10.0" pandas = "2.3.3" azure-monitor-opentelemetry = "^1.6.10" +azure-monitor-events-extension = "^0.1.0" opentelemetry-instrumentation-httpx = "^0.52b0" pillow = "11.0.0" azure-mgmt-cognitiveservices = "^14.1.0"