diff --git a/.gitignore b/.gitignore index 58d55b91..c2a0481f 100644 --- a/.gitignore +++ b/.gitignore @@ -140,7 +140,7 @@ celerybeat.pid *.sage.py # Environments -.env +.env* .venv env/ venv/ diff --git a/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Quality/AI_Judge_Evaluators_Quality.ipynb b/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Quality/AI_Judge_Evaluators_Quality.ipynb index ad5d1d45..8952a34d 100644 --- a/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Quality/AI_Judge_Evaluators_Quality.ipynb +++ b/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Quality/AI_Judge_Evaluators_Quality.ipynb @@ -63,6 +63,16 @@ "from azure.identity import DefaultAzureCredential" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "load_dotenv()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -95,33 +105,13 @@ ] }, "outputs": [], - "source": [ - "azure_ai_project = {\n", - " \"subscription_id\": \"\",\n", - " \"resource_group_name\": \"\",\n", - " \"project_name\": \"\",\n", - "}\n", - "\n", - "azure_openai_api_version = \"\"\n", - "azure_openai_deployment = \"\"\n", - "azure_openai_endpoint = \"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "tags": [] - }, - "outputs": [], "source": [ "import os\n", + "azure_ai_project = os.environ.get(\"AZURE_AI_PROJECT\")\n", "\n", - "# Use the following code to set the environment variables if not already set. If set, you can skip this step. In addition, you should also set \"AZURE_OPENAI_ENDPOINT\" to the endpoint of your AzureOpenAI service.\n", - "\n", - "os.environ[\"AZURE_OPENAI_API_VERSION\"] = azure_openai_api_version\n", - "os.environ[\"AZURE_OPENAI_DEPLOYMENT\"] = azure_openai_deployment\n", - "os.environ[\"AZURE_OPENAI_ENDPOINT\"] = azure_openai_endpoint" + "azure_openai_api_version = os.environ.get(\"AZURE_OPENAI_API_VERSION\")\n", + "azure_openai_deployment = os.environ.get(\"AZURE_OPENAI_DEPLOYMENT\")\n", + "azure_openai_endpoint = os.environ.get(\"AZURE_OPENAI_ENDPOINT\")" ] }, { @@ -222,7 +212,7 @@ "path = str(pathlib.Path(pathlib.Path.cwd())) + \"/data.jsonl\"\n", "\n", "results = evaluate(\n", - " evaluation_name=\"Eval-Run-\" + \"-\" + model_config[\"azure_deployment\"].title(),\n", + " evaluation_name=\"Quality \" + model_config[\"azure_deployment\"].title(),\n", " data=path,\n", " target=ModelEndpoint(model_config),\n", " evaluators={\n", @@ -231,7 +221,12 @@ " \"relevance\": relevance_evaluator,\n", " \"groundedness\": groundedness_evaluator,\n", " \"fluency\": fluency_evaluator,\n", - " \"similarity\": similarity_evaluator,\n", + " #\"similarity\": similarity_evaluator,\n", + " },\n", + " azure_ai_project={\n", + " \"subscription_id\": os.environ[\"REPORT_AZURE_SUBSCRIPTION_ID\"],\n", + " \"project_name\": os.environ[\"REPORT_PROJECT_NAME\"],\n", + " \"resource_group_name\": os.environ[\"REPORT_RESOURCE_GROUP_NAME\"],\n", " },\n", " evaluator_config={\n", " \"content_safety\": {\"column_mapping\": {\"query\": \"${data.query}\", \"response\": \"${target.response}\"}},\n", @@ -269,7 +264,7 @@ "metadata": {}, "outputs": [], "source": [ - "pprint(results)" + "pd.DataFrame(results[\"rows\"])" ] }, { @@ -278,13 +273,20 @@ "metadata": {}, "outputs": [], "source": [ - "pd.DataFrame(results[\"rows\"])" + "pprint(f'AI Foundary URL: {results.get(\"studio_url\")}')" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -297,7 +299,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.13.3" } }, "nbformat": 4, diff --git a/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Safety_Risks/AI_Judge_Evaluators_Safety_Risks_Image/AI_Judge_Evaluators_Safety_Risks_Image.ipynb b/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Safety_Risks/AI_Judge_Evaluators_Safety_Risks_Image/AI_Judge_Evaluators_Safety_Risks_Image.ipynb index 6b6bdc72..756bc8e7 100644 --- a/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Safety_Risks/AI_Judge_Evaluators_Safety_Risks_Image/AI_Judge_Evaluators_Safety_Risks_Image.ipynb +++ b/scenarios/evaluate/Supported_Evaluation_Metrics/AI_Judge_Evaluators_Safety_Risks/AI_Judge_Evaluators_Safety_Risks_Image/AI_Judge_Evaluators_Safety_Risks_Image.ipynb @@ -62,11 +62,11 @@ "from azure.ai.evaluation import ViolenceEvaluator, SexualEvaluator, SelfHarmEvaluator, HateUnfairnessEvaluator\n", "from azure.identity import DefaultAzureCredential\n", "\n", - "azure_ai_project = {\n", - " \"subscription_id\": \"\",\n", - " \"resource_group_name\": \"\",\n", - " \"project_name\": \"\",\n", - "}" + "from dotenv import load_dotenv\n", + "load_dotenv()\n", + "\n", + "import os\n", + "azure_ai_project = os.environ.get(\"AZURE_AI_PROJECT\")" ] }, { @@ -76,6 +76,22 @@ "### 1. Multi-modal Content Safety Evaluator" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Tom Cruise\n", + "#eval_image_url = \"https://cdn.britannica.com/68/178268-050-5B4E7FB6/Tom-Cruise-2013.jpg\"\n", + "\n", + "# Hellraiser\n", + "eval_image_url = \"https://dotcomstories.com/wp-content/uploads/2023/07/Hellraiser-1987.jpg\"\n", + "\n", + "from IPython.display import Image, display\n", + "display(Image(url=eval_image_url))" + ] + }, { "cell_type": "code", "execution_count": null, @@ -99,7 +115,7 @@ " {\"type\": \"text\", \"text\": \"Can you describe this image?\"},\n", " {\n", " \"type\": \"image_url\",\n", - " \"image_url\": {\"url\": \"https://cdn.britannica.com/68/178268-050-5B4E7FB6/Tom-Cruise-2013.jpg\"},\n", + " \"image_url\": {\"url\": eval_image_url},\n", " },\n", " ],\n", " },\n", @@ -192,12 +208,35 @@ "\n", "result = evaluate(\n", " data=file_path,\n", - " azure_ai_project=azure_ai_project,\n", + " azure_ai_project={\n", + " \"subscription_id\": os.environ[\"REPORT_AZURE_SUBSCRIPTION_ID\"],\n", + " \"project_name\": os.environ[\"REPORT_PROJECT_NAME\"],\n", + " \"resource_group_name\": os.environ[\"REPORT_RESOURCE_GROUP_NAME\"],\n", + " },\n", " evaluators={\"content_safety\": content_safety_eval},\n", ")\n", "pprint(result)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pprint(f'AI Foundary URL: {result.get(\"studio_url\")}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "pd.DataFrame(result[\"rows\"])" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -232,6 +271,13 @@ "result = evaluator(conversation=conversation)\n", "pprint(result)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -249,7 +295,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.13.3" } }, "nbformat": 4, diff --git a/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluator_Intent_Resolution.ipynb b/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluator_Intent_Resolution.ipynb index 2dcd8d01..00df47f3 100644 --- a/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluator_Intent_Resolution.ipynb +++ b/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluator_Intent_Resolution.ipynb @@ -62,6 +62,35 @@ "### Initialize Intent Resolution Evaluator\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install azure-ai-projects azure-identity azure-ai-evaluation python-dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "load_dotenv()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "azure_ai_project = os.environ.get(\"AZURE_AI_PROJECT\")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -71,13 +100,16 @@ "import os\n", "from azure.ai.evaluation import AzureOpenAIModelConfiguration\n", "from azure.ai.evaluation import IntentResolutionEvaluator\n", + "from azure.identity import DefaultAzureCredential, get_bearer_token_provider\n", "from pprint import pprint\n", "\n", + "token_provider = get_bearer_token_provider(DefaultAzureCredential(), \"https://cognitiveservices.azure.com/.default\")\n", + "\n", "model_config = AzureOpenAIModelConfiguration(\n", " azure_endpoint=os.environ[\"AZURE_OPENAI_ENDPOINT\"],\n", " api_key=os.environ[\"AZURE_OPENAI_API_KEY\"],\n", " api_version=os.environ[\"AZURE_OPENAI_API_VERSION\"],\n", - " azure_deployment=os.environ[\"MODEL_DEPLOYMENT_NAME\"],\n", + " azure_deployment=os.environ[\"AZURE_OPENAI_DEPLOYMENT\"],\n", ")\n", "\n", "intent_resolution_evaluator = IntentResolutionEvaluator(model_config)" @@ -318,6 +350,8 @@ "# This was saved using agent thread and converter.\n", "file_name = \"evaluation_data.jsonl\"\n", "\n", + "azure_ai_project = \"https://cvi-aie-wf-swedencentra-resource.services.ai.azure.com/api/projects/cvi-aie-wf-swedencentral\"\n", + "\n", "response = evaluate(\n", " data=file_name,\n", " evaluation_name=\"Intent Resolution Evaluation\",\n", @@ -325,18 +359,35 @@ " \"intent_resolution\": intent_resolution_evaluator,\n", " },\n", " azure_ai_project={\n", - " \"subscription_id\": os.environ[\"AZURE_SUBSCRIPTION_ID\"],\n", - " \"project_name\": os.environ[\"PROJECT_NAME\"],\n", - " \"resource_group_name\": os.environ[\"RESOURCE_GROUP_NAME\"],\n", + " \"subscription_id\": os.environ[\"REPORT_AZURE_SUBSCRIPTION_ID\"],\n", + " \"project_name\": os.environ[\"REPORT_PROJECT_NAME\"],\n", + " \"resource_group_name\": os.environ[\"REPORT_RESOURCE_GROUP_NAME\"],\n", " },\n", ")\n", "pprint(f'AI Foundary URL: {response.get(\"studio_url\")}')" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "pd.DataFrame(response[\"rows\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "test_agent_eval_sample", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -349,7 +400,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.13.3" } }, "nbformat": 4, diff --git a/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluator_Task_Adherence.ipynb b/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluator_Task_Adherence.ipynb index 8a1f9350..db3865cc 100644 --- a/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluator_Task_Adherence.ipynb +++ b/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluator_Task_Adherence.ipynb @@ -80,6 +80,25 @@ " - ToolDefinitions : The list of tool definitions the agent can call. This may be useful for the evaluator to better assess if the right tool was called to adhere to user intent." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install azure-ai-projects azure-identity azure-ai-evaluation python-dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "load_dotenv()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -106,6 +125,18 @@ "task_adherence_evaluator = TaskAdherenceEvaluator(model_config)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"AZURE_OPENAI_ENDPOINT=\" + os.environ[\"AZURE_OPENAI_ENDPOINT\"])\n", + "print(\"AZURE_OPENAI_API_KEY=\" + os.environ[\"AZURE_OPENAI_API_KEY\"])\n", + "print(\"AZURE_OPENAI_API_VERSION=\" + os.environ[\"AZURE_OPENAI_API_VERSION\"])\n", + "print(\"MODEL_DEPLOYMENT_NAME=\" + os.environ[\"MODEL_DEPLOYMENT_NAME\"])" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -263,9 +294,9 @@ " \"task_adherence\": task_adherence_evaluator,\n", " },\n", " azure_ai_project={\n", - " \"subscription_id\": os.environ[\"AZURE_SUBSCRIPTION_ID\"],\n", - " \"project_name\": os.environ[\"PROJECT_NAME\"],\n", - " \"resource_group_name\": os.environ[\"RESOURCE_GROUP_NAME\"],\n", + " \"subscription_id\": os.environ[\"REPORT_AZURE_SUBSCRIPTION_ID\"],\n", + " \"project_name\": os.environ[\"REPORT_PROJECT_NAME\"],\n", + " \"resource_group_name\": os.environ[\"REPORT_RESOURCE_GROUP_NAME\"],\n", " },\n", ")\n", "pprint(f'AI Foundary URL: {response.get(\"studio_url\")}')" @@ -274,7 +305,7 @@ ], "metadata": { "kernelspec": { - "display_name": "test_agent_eval_sample", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -287,7 +318,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.13.3" } }, "nbformat": 4, diff --git a/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluator_Tool_Call_Accuracy.ipynb b/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluator_Tool_Call_Accuracy.ipynb index c43b1aca..6166f467 100644 --- a/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluator_Tool_Call_Accuracy.ipynb +++ b/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluator_Tool_Call_Accuracy.ipynb @@ -71,6 +71,25 @@ "### Initialize Tool Call Accuracy Evaluator\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install azure-ai-projects azure-identity azure-ai-evaluation python-dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "load_dotenv()" + ] + }, { "cell_type": "code", "execution_count": null, @@ -329,18 +348,35 @@ " \"tool_call_accuracy\": tool_call_accuracy,\n", " },\n", " azure_ai_project={\n", - " \"subscription_id\": os.environ[\"AZURE_SUBSCRIPTION_ID\"],\n", - " \"project_name\": os.environ[\"PROJECT_NAME\"],\n", - " \"resource_group_name\": os.environ[\"RESOURCE_GROUP_NAME\"],\n", + " \"subscription_id\": os.environ[\"REPORT_AZURE_SUBSCRIPTION_ID\"],\n", + " \"project_name\": os.environ[\"REPORT_PROJECT_NAME\"],\n", + " \"resource_group_name\": os.environ[\"REPORT_RESOURCE_GROUP_NAME\"],\n", " },\n", ")\n", "pprint(f'AI Foundary URL: {response.get(\"studio_url\")}')" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "pd.DataFrame(response[\"rows\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "test_agent_eval_sample", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -353,7 +389,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.13.3" } }, "nbformat": 4, diff --git a/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluators_Response_Completeness.ipynb b/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluators_Response_Completeness.ipynb index 8139a32f..7ec3e561 100644 --- a/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluators_Response_Completeness.ipynb +++ b/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/AI_Judge_Evaluators_Response_Completeness.ipynb @@ -59,6 +59,25 @@ "The evaluator uses these inputs to determine the completeness score, ensuring that the response meaningfully addresses the query while adhering to the provided definitions and data." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install azure-ai-projects azure-identity azure-ai-evaluation python-dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dotenv import load_dotenv\n", + "load_dotenv()" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -208,16 +227,37 @@ " evaluators={\n", " \"response_completeness\": response_completeness_evaluator,\n", " },\n", - " azure_ai_project=azure_ai_project,\n", + " azure_ai_project={\n", + " \"subscription_id\": os.environ[\"REPORT_AZURE_SUBSCRIPTION_ID\"],\n", + " \"project_name\": os.environ[\"REPORT_PROJECT_NAME\"],\n", + " \"resource_group_name\": os.environ[\"REPORT_RESOURCE_GROUP_NAME\"],\n", + " },\n", ")\n", "\n", "pprint(f'AI Foundry URL: {response.get(\"studio_url\")}')" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "pd.DataFrame(response[\"rows\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "test_agent_eval_sample", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -230,7 +270,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.13.3" } }, "nbformat": 4, diff --git a/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/Evaluate_Azure_AI_Agent_Quality.ipynb b/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/Evaluate_Azure_AI_Agent_Quality.ipynb index 6b1930a3..26481204 100644 --- a/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/Evaluate_Azure_AI_Agent_Quality.ipynb +++ b/scenarios/evaluate/Supported_Evaluation_Metrics/Agent_Evaluation/Evaluate_Azure_AI_Agent_Quality.ipynb @@ -57,6 +57,154 @@ "9) **AGENT_MODEL_DEPLOYMENT_NAME** - The deployment name of the model for your Azure AI agent, as found under the \"Name\" column in the \"Models + endpoints\" tab in your Azure AI Foundry project." ] }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: azure-ai-projects in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (1.0.0b11)\n", + "Requirement already satisfied: azure-identity in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (1.23.0)\n", + "Requirement already satisfied: azure-ai-evaluation in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (1.8.0)\n", + "Requirement already satisfied: python-dotenv in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (1.1.0)\n", + "Requirement already satisfied: isodate>=0.6.1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-projects) (0.7.2)\n", + "Requirement already satisfied: azure-core>=1.30.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-projects) (1.34.0)\n", + "Requirement already satisfied: typing-extensions>=4.12.2 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-projects) (4.14.0)\n", + "Requirement already satisfied: azure-storage-blob>=12.15.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-projects) (12.25.1)\n", + "Requirement already satisfied: azure-ai-agents>=1.0.0b1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-projects) (1.1.0b1)\n", + "Requirement already satisfied: cryptography>=2.5 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-identity) (45.0.3)\n", + "Requirement already satisfied: msal>=1.30.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-identity) (1.32.3)\n", + "Requirement already satisfied: msal-extensions>=1.2.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-identity) (1.3.1)\n", + "Requirement already satisfied: promptflow-devkit>=1.17.1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-evaluation) (1.18.0)\n", + "Requirement already satisfied: promptflow-core>=1.17.1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-evaluation) (1.18.0)\n", + "Requirement already satisfied: pyjwt>=2.8.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-evaluation) (2.10.1)\n", + "Requirement already satisfied: nltk>=3.9.1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-evaluation) (3.9.1)\n", + "Requirement already satisfied: httpx>=0.25.1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-evaluation) (0.28.1)\n", + "Requirement already satisfied: pandas<3.0.0,>=2.1.2 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-evaluation) (2.2.3)\n", + "Requirement already satisfied: openai>=1.78.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-evaluation) (1.84.0)\n", + "Requirement already satisfied: ruamel.yaml<1.0.0,>=0.17.10 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-evaluation) (0.18.12)\n", + "Requirement already satisfied: msrest>=0.6.21 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-evaluation) (0.7.1)\n", + "Requirement already satisfied: Jinja2>=3.1.6 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-evaluation) (3.1.6)\n", + "Requirement already satisfied: aiohttp>=3.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-ai-evaluation) (3.12.7)\n", + "Requirement already satisfied: numpy>=1.26.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from pandas<3.0.0,>=2.1.2->azure-ai-evaluation) (2.2.6)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from pandas<3.0.0,>=2.1.2->azure-ai-evaluation) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from pandas<3.0.0,>=2.1.2->azure-ai-evaluation) (2025.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from pandas<3.0.0,>=2.1.2->azure-ai-evaluation) (2025.2)\n", + "Requirement already satisfied: ruamel.yaml.clib>=0.2.7 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from ruamel.yaml<1.0.0,>=0.17.10->azure-ai-evaluation) (0.2.12)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.5.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from aiohttp>=3.0->azure-ai-evaluation) (2.6.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from aiohttp>=3.0->azure-ai-evaluation) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from aiohttp>=3.0->azure-ai-evaluation) (25.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from aiohttp>=3.0->azure-ai-evaluation) (1.6.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from aiohttp>=3.0->azure-ai-evaluation) (6.4.4)\n", + "Requirement already satisfied: propcache>=0.2.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from aiohttp>=3.0->azure-ai-evaluation) (0.3.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from aiohttp>=3.0->azure-ai-evaluation) (1.20.0)\n", + "Requirement already satisfied: idna>=2.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from yarl<2.0,>=1.17.0->aiohttp>=3.0->azure-ai-evaluation) (3.10)\n", + "Requirement already satisfied: requests>=2.21.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-core>=1.30.0->azure-ai-projects) (2.32.3)\n", + "Requirement already satisfied: six>=1.11.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-core>=1.30.0->azure-ai-projects) (1.17.0)\n", + "Requirement already satisfied: cffi>=1.14 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from cryptography>=2.5->azure-identity) (1.17.1)\n", + "Requirement already satisfied: pycparser in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from cffi>=1.14->cryptography>=2.5->azure-identity) (2.22)\n", + "Requirement already satisfied: anyio in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from httpx>=0.25.1->azure-ai-evaluation) (4.9.0)\n", + "Requirement already satisfied: certifi in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from httpx>=0.25.1->azure-ai-evaluation) (2025.4.26)\n", + "Requirement already satisfied: httpcore==1.* in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from httpx>=0.25.1->azure-ai-evaluation) (1.0.9)\n", + "Requirement already satisfied: h11>=0.16 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from httpcore==1.*->httpx>=0.25.1->azure-ai-evaluation) (0.16.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from Jinja2>=3.1.6->azure-ai-evaluation) (3.0.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from requests>=2.21.0->azure-core>=1.30.0->azure-ai-projects) (3.4.2)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from requests>=2.21.0->azure-core>=1.30.0->azure-ai-projects) (2.4.0)\n", + "Requirement already satisfied: requests-oauthlib>=0.5.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from msrest>=0.6.21->azure-ai-evaluation) (2.0.0)\n", + "Requirement already satisfied: click in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from nltk>=3.9.1->azure-ai-evaluation) (8.2.1)\n", + "Requirement already satisfied: joblib in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from nltk>=3.9.1->azure-ai-evaluation) (1.5.1)\n", + "Requirement already satisfied: regex>=2021.8.3 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from nltk>=3.9.1->azure-ai-evaluation) (2024.11.6)\n", + "Requirement already satisfied: tqdm in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from nltk>=3.9.1->azure-ai-evaluation) (4.67.1)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from openai>=1.78.0->azure-ai-evaluation) (1.9.0)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from openai>=1.78.0->azure-ai-evaluation) (0.10.0)\n", + "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from openai>=1.78.0->azure-ai-evaluation) (2.11.5)\n", + "Requirement already satisfied: sniffio in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from openai>=1.78.0->azure-ai-evaluation) (1.3.1)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from pydantic<3,>=1.9.0->openai>=1.78.0->azure-ai-evaluation) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.33.2 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from pydantic<3,>=1.9.0->openai>=1.78.0->azure-ai-evaluation) (2.33.2)\n", + "Requirement already satisfied: typing-inspection>=0.4.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from pydantic<3,>=1.9.0->openai>=1.78.0->azure-ai-evaluation) (0.4.1)\n", + "Requirement already satisfied: docstring_parser in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-core>=1.17.1->azure-ai-evaluation) (0.16)\n", + "Requirement already satisfied: fastapi<1.0.0,>=0.109.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-core>=1.17.1->azure-ai-evaluation) (0.115.12)\n", + "Requirement already satisfied: filetype>=1.2.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-core>=1.17.1->azure-ai-evaluation) (1.2.0)\n", + "Requirement already satisfied: flask<4.0.0,>=2.2.3 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-core>=1.17.1->azure-ai-evaluation) (3.1.1)\n", + "Requirement already satisfied: jsonschema<5.0.0,>=4.0.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-core>=1.17.1->azure-ai-evaluation) (4.24.0)\n", + "Requirement already satisfied: promptflow-tracing==1.18.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-core>=1.17.1->azure-ai-evaluation) (1.18.0)\n", + "Requirement already satisfied: psutil in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-core>=1.17.1->azure-ai-evaluation) (7.0.0)\n", + "Requirement already satisfied: opentelemetry-sdk<2.0.0,>=1.22.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-tracing==1.18.0->promptflow-core>=1.17.1->azure-ai-evaluation) (1.33.1)\n", + "Requirement already satisfied: tiktoken>=0.4.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-tracing==1.18.0->promptflow-core>=1.17.1->azure-ai-evaluation) (0.9.0)\n", + "Requirement already satisfied: starlette<0.47.0,>=0.40.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from fastapi<1.0.0,>=0.109.0->promptflow-core>=1.17.1->azure-ai-evaluation) (0.46.2)\n", + "Requirement already satisfied: blinker>=1.9.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from flask<4.0.0,>=2.2.3->promptflow-core>=1.17.1->azure-ai-evaluation) (1.9.0)\n", + "Requirement already satisfied: itsdangerous>=2.2.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from flask<4.0.0,>=2.2.3->promptflow-core>=1.17.1->azure-ai-evaluation) (2.2.0)\n", + "Requirement already satisfied: werkzeug>=3.1.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from flask<4.0.0,>=2.2.3->promptflow-core>=1.17.1->azure-ai-evaluation) (3.1.3)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from jsonschema<5.0.0,>=4.0.0->promptflow-core>=1.17.1->azure-ai-evaluation) (2025.4.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from jsonschema<5.0.0,>=4.0.0->promptflow-core>=1.17.1->azure-ai-evaluation) (0.36.2)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from jsonschema<5.0.0,>=4.0.0->promptflow-core>=1.17.1->azure-ai-evaluation) (0.25.1)\n", + "Requirement already satisfied: opentelemetry-api==1.33.1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->promptflow-tracing==1.18.0->promptflow-core>=1.17.1->azure-ai-evaluation) (1.33.1)\n", + "Requirement already satisfied: opentelemetry-semantic-conventions==0.54b1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->promptflow-tracing==1.18.0->promptflow-core>=1.17.1->azure-ai-evaluation) (0.54b1)\n", + "Requirement already satisfied: deprecated>=1.2.6 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from opentelemetry-api==1.33.1->opentelemetry-sdk<2.0.0,>=1.22.0->promptflow-tracing==1.18.0->promptflow-core>=1.17.1->azure-ai-evaluation) (1.2.18)\n", + "Requirement already satisfied: importlib-metadata<8.7.0,>=6.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from opentelemetry-api==1.33.1->opentelemetry-sdk<2.0.0,>=1.22.0->promptflow-tracing==1.18.0->promptflow-core>=1.17.1->azure-ai-evaluation) (8.6.1)\n", + "Requirement already satisfied: zipp>=3.20 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from importlib-metadata<8.7.0,>=6.0->opentelemetry-api==1.33.1->opentelemetry-sdk<2.0.0,>=1.22.0->promptflow-tracing==1.18.0->promptflow-core>=1.17.1->azure-ai-evaluation) (3.22.0)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from deprecated>=1.2.6->opentelemetry-api==1.33.1->opentelemetry-sdk<2.0.0,>=1.22.0->promptflow-tracing==1.18.0->promptflow-core>=1.17.1->azure-ai-evaluation) (1.17.2)\n", + "Requirement already satisfied: argcomplete>=3.2.3 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (3.6.2)\n", + "Requirement already satisfied: azure-monitor-opentelemetry-exporter<2.0.0,>=1.0.0b21 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (1.0.0b37)\n", + "Requirement already satisfied: colorama<0.5.0,>=0.4.6 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (0.4.6)\n", + "Requirement already satisfied: filelock<4.0.0,>=3.4.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (3.18.0)\n", + "Requirement already satisfied: flask-cors<6.0.0,>=5.0.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (5.0.1)\n", + "Requirement already satisfied: flask-restx<2.0.0,>=1.2.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (1.3.0)\n", + "Requirement already satisfied: gitpython<4.0.0,>=3.1.24 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (3.1.44)\n", + "Requirement already satisfied: keyring<25.0.0,>=24.2.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (24.3.1)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.5 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (3.26.1)\n", + "Requirement already satisfied: opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (1.33.1)\n", + "Requirement already satisfied: pillow<11.1.0,>=10.1.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (11.0.0)\n", + "Requirement already satisfied: pydash<8.0.0,>=6.0.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (7.0.7)\n", + "Requirement already satisfied: sqlalchemy<3.0.0,>=1.4.48 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (2.0.41)\n", + "Requirement already satisfied: strictyaml<2.0.0,>=1.5.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (1.7.3)\n", + "Requirement already satisfied: tabulate<1.0.0,>=0.9.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (0.9.0)\n", + "Requirement already satisfied: waitress<4.0.0,>=3.0.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from promptflow-devkit>=1.17.1->azure-ai-evaluation) (3.0.2)\n", + "Requirement already satisfied: fixedint==0.1.6 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from azure-monitor-opentelemetry-exporter<2.0.0,>=1.0.0b21->promptflow-devkit>=1.17.1->azure-ai-evaluation) (0.1.6)\n", + "Requirement already satisfied: aniso8601>=0.82 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from flask-restx<2.0.0,>=1.2.0->promptflow-devkit>=1.17.1->azure-ai-evaluation) (10.0.1)\n", + "Requirement already satisfied: importlib-resources in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from flask-restx<2.0.0,>=1.2.0->promptflow-devkit>=1.17.1->azure-ai-evaluation) (6.5.2)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from gitpython<4.0.0,>=3.1.24->promptflow-devkit>=1.17.1->azure-ai-evaluation) (4.0.12)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from gitdb<5,>=4.0.1->gitpython<4.0.0,>=3.1.24->promptflow-devkit>=1.17.1->azure-ai-evaluation) (5.0.2)\n", + "Requirement already satisfied: jaraco.classes in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from keyring<25.0.0,>=24.2.0->promptflow-devkit>=1.17.1->azure-ai-evaluation) (3.4.0)\n", + "Requirement already satisfied: packaging>=17.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from marshmallow<4.0.0,>=3.5->promptflow-devkit>=1.17.1->azure-ai-evaluation) (25.0)\n", + "Requirement already satisfied: googleapis-common-protos~=1.52 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->promptflow-devkit>=1.17.1->azure-ai-evaluation) (1.70.0)\n", + "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.33.1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->promptflow-devkit>=1.17.1->azure-ai-evaluation) (1.33.1)\n", + "Requirement already satisfied: opentelemetry-proto==1.33.1 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->promptflow-devkit>=1.17.1->azure-ai-evaluation) (1.33.1)\n", + "Requirement already satisfied: protobuf<6.0,>=5.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from opentelemetry-proto==1.33.1->opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->promptflow-devkit>=1.17.1->azure-ai-evaluation) (5.29.5)\n", + "Requirement already satisfied: oauthlib>=3.0.0 in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from requests-oauthlib>=0.5.0->msrest>=0.6.21->azure-ai-evaluation) (3.2.2)\n", + "Requirement already satisfied: more-itertools in /Users/cv/Documents/Projects/azureai-samples/.venv/lib/python3.13/site-packages (from jaraco.classes->keyring<25.0.0,>=24.2.0->promptflow-devkit>=1.17.1->azure-ai-evaluation) (10.7.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install azure-ai-projects azure-identity azure-ai-evaluation python-dotenv" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dotenv import load_dotenv, find_dotenv\n", + "load_dotenv(verbose=True, dotenv_path=find_dotenv(\".env.1rp.swedencentral\"))" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -66,21 +214,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import os\n", "from azure.ai.projects import AIProjectClient\n", "from azure.identity import DefaultAzureCredential\n", - "from azure.ai.projects.models import FunctionTool, ToolSet\n", + "from azure.ai.agents.models import FunctionTool, ToolSet, ListSortOrder\n", "\n", "# Import your custom functions to be used as Tools for the Agent\n", "from user_functions import user_functions\n", "\n", - "project_client = AIProjectClient.from_connection_string(\n", + "project_client = AIProjectClient(\n", " credential=DefaultAzureCredential(),\n", - " conn_str=os.environ[\"PROJECT_CONNECTION_STRING\"],\n", + " endpoint=os.environ[\"AZURE_AI_PROJECT\"],\n", ")\n", "\n", "AGENT_NAME = \"Seattle Tourist Assistant\"\n", @@ -92,7 +240,7 @@ "toolset.add(functions)\n", "\n", "# To enable tool calls executed automatically\n", - "project_client.agents.enable_auto_function_calls(toolset=toolset)" + "project_client.agents.enable_auto_function_calls(toolset)" ] }, { @@ -104,9 +252,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Created agent, ID: asst_4plunuKy4SZ0qOUdtUgiBn0e\n" + ] + } + ], "source": [ "agent = project_client.agents.create_agent(\n", " model=os.environ[\"AGENT_MODEL_DEPLOYMENT_NAME\"],\n", @@ -127,11 +283,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Created thread, ID: thread_WXoBU8sMCJqpH5OXwOKIV3Ac\n" + ] + } + ], "source": [ - "thread = project_client.agents.create_thread()\n", + "thread = project_client.agents.threads.create()\n", "print(f\"Created thread, ID: {thread.id}\")" ] }, @@ -154,19 +318,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Created message, ID: msg_sCqtNa24xkcxjVRRs1lG3fvD\n" + ] + } + ], "source": [ "# Create message to thread\n", "\n", "MESSAGE = \"Can you email me weather info for Seattle ?\"\n", "\n", - "message = project_client.agents.create_message(\n", - " thread_id=thread.id,\n", - " role=\"user\",\n", - " content=MESSAGE,\n", - ")\n", + "message = project_client.agents.messages.create(thread_id=thread.id, role=\"user\", content=MESSAGE)\n", "print(f\"Created message, ID: {message.id}\")" ] }, @@ -179,11 +347,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Run finished with status: RunStatus.COMPLETED\n", + "Run ID: run_YxXKo0irHJe8pa9xZ3RJrQNe\n" + ] + } + ], "source": [ - "run = project_client.agents.create_and_process_run(thread_id=thread.id, agent_id=agent.id)\n", + "run = project_client.agents.runs.create_and_process(thread_id=thread.id, agent_id=agent.id)\n", "\n", "print(f\"Run finished with status: {run.status}\")\n", "\n", @@ -202,11 +379,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Role: MessageRole.USER\n", + "Content: Can you email me weather info for Seattle ?\n", + "----------------------------------------\n", + "Role: MessageRole.AGENT\n", + "Content: The current weather in Seattle is **Rainy, 14°C**. Could you please provide your email address so I can send this information to you?\n", + "----------------------------------------\n" + ] + } + ], "source": [ - "for message in project_client.agents.list_messages(thread.id, order=\"asc\").data:\n", + "for message in project_client.agents.messages.list(thread_id=thread.id, order=ListSortOrder.ASCENDING):\n", " print(f\"Role: {message.role}\")\n", " print(f\"Content: {message.content[0].text.value}\")\n", " print(\"-\" * 40)" @@ -228,9 +418,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class AIAgentConverter: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", + "Class FDPAgentDataRetriever: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", + "Class AIAgentDataRetriever: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + } + ], "source": [ "from azure.ai.evaluation import AIAgentConverter\n", "\n", @@ -265,9 +465,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Class IntentResolutionEvaluator: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", + "Class ToolCallAccuracyEvaluator: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n", + "Class TaskAdherenceEvaluator: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.\n" + ] + } + ], "source": [ "from azure.ai.evaluation import (\n", " ToolCallAccuracyEvaluator,\n", @@ -306,9 +516,125 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2025-06-04 14:16:46 -0700][promptflow._core.entry_meta_generator][WARNING] - Generate meta in current process and timeout won't take effect. Please handle timeout manually outside current process.\n", + "[2025-06-04 14:16:46 -0700][promptflow._core.entry_meta_generator][WARNING] - Generate meta in current process and timeout won't take effect. Please handle timeout manually outside current process.\n", + "[2025-06-04 14:16:46 -0700][promptflow._core.entry_meta_generator][WARNING] - Generate meta in current process and timeout won't take effect. Please handle timeout manually outside current process.\n", + "[2025-06-04 14:16:46 -0700][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run azure_ai_evaluation_evaluators_task_adherence_20250604_141645_529728, log path: /Users/cv/.promptflow/.runs/azure_ai_evaluation_evaluators_task_adherence_20250604_141645_529728/logs.txt\n", + "[2025-06-04 14:16:46 -0700][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run azure_ai_evaluation_evaluators_intent_resolution_20250604_141645_529535, log path: /Users/cv/.promptflow/.runs/azure_ai_evaluation_evaluators_intent_resolution_20250604_141645_529535/logs.txt\n", + "[2025-06-04 14:16:46 -0700][promptflow._sdk._orchestrator.run_submitter][INFO] - Submitting run azure_ai_evaluation_evaluators_tool_call_accuracy_20250604_141645_529346, log path: /Users/cv/.promptflow/.runs/azure_ai_evaluation_evaluators_tool_call_accuracy_20250604_141645_529346/logs.txt\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2025-06-04 14:16:46 -0700 77407 execution.bulk INFO Current thread is not main thread, skip signal handler registration in BatchEngine.\n", + "2025-06-04 14:16:46 -0700 77407 execution.bulk INFO Finished 3 / 3 lines.\n", + "2025-06-04 14:16:46 -0700 77407 execution.bulk INFO Average execution time for completed lines: 0.01 seconds. Estimated time for incomplete lines: 0.0 seconds.\n", + "======= Run Summary =======\n", + "\n", + "Run name: \"azure_ai_evaluation_evaluators_tool_call_accuracy_20250604_141645_529346\"\n", + "Run status: \"Completed\"\n", + "Start time: \"2025-06-04 14:16:45.540568-07:00\"\n", + "Duration: \"0:00:01.776557\"\n", + "Output path: \"/Users/cv/.promptflow/.runs/azure_ai_evaluation_evaluators_tool_call_accuracy_20250604_141645_529346\"\n", + "\n", + "2025-06-04 14:16:49 -0700 77407 execution.bulk INFO Finished 1 / 3 lines.\n", + "2025-06-04 14:16:49 -0700 77407 execution.bulk INFO Average execution time for completed lines: 2.91 seconds. Estimated time for incomplete lines: 5.82 seconds.\n", + "2025-06-04 14:16:49 -0700 77407 execution.bulk INFO Finished 2 / 3 lines.\n", + "2025-06-04 14:16:49 -0700 77407 execution.bulk INFO Average execution time for completed lines: 1.53 seconds. Estimated time for incomplete lines: 1.53 seconds.\n", + "2025-06-04 14:16:49 -0700 77407 execution.bulk INFO Finished 1 / 3 lines.\n", + "2025-06-04 14:16:49 -0700 77407 execution.bulk INFO Average execution time for completed lines: 3.09 seconds. Estimated time for incomplete lines: 6.18 seconds.\n", + "2025-06-04 14:16:50 -0700 77407 execution.bulk INFO Finished 2 / 3 lines.\n", + "2025-06-04 14:16:50 -0700 77407 execution.bulk INFO Average execution time for completed lines: 2.02 seconds. Estimated time for incomplete lines: 2.02 seconds.\n", + "2025-06-04 14:16:50 -0700 77407 execution.bulk INFO Finished 3 / 3 lines.\n", + "2025-06-04 14:16:50 -0700 77407 execution.bulk INFO Average execution time for completed lines: 1.35 seconds. Estimated time for incomplete lines: 0.0 seconds.\n", + "2025-06-04 14:16:50 -0700 77407 execution.bulk INFO Finished 3 / 3 lines.\n", + "2025-06-04 14:16:50 -0700 77407 execution.bulk INFO Average execution time for completed lines: 1.49 seconds. Estimated time for incomplete lines: 0.0 seconds.\n", + "2025-06-04 14:16:46 -0700 77407 execution.bulk INFO Current thread is not main thread, skip signal handler registration in BatchEngine.\n", + "2025-06-04 14:16:49 -0700 77407 execution.bulk INFO Finished 1 / 3 lines.\n", + "2025-06-04 14:16:49 -0700 77407 execution.bulk INFO Average execution time for completed lines: 2.91 seconds. Estimated time for incomplete lines: 5.82 seconds.\n", + "2025-06-04 14:16:49 -0700 77407 execution.bulk INFO Finished 2 / 3 lines.\n", + "2025-06-04 14:16:49 -0700 77407 execution.bulk INFO Average execution time for completed lines: 1.53 seconds. Estimated time for incomplete lines: 1.53 seconds.\n", + "2025-06-04 14:16:50 -0700 77407 execution.bulk INFO Finished 3 / 3 lines.\n", + "2025-06-04 14:16:50 -0700 77407 execution.bulk INFO Average execution time for completed lines: 1.49 seconds. Estimated time for incomplete lines: 0.0 seconds.\n", + "======= Run Summary =======\n", + "\n", + "Run name: \"azure_ai_evaluation_evaluators_intent_resolution_20250604_141645_529535\"\n", + "Run status: \"Completed\"\n", + "Start time: \"2025-06-04 14:16:45.540524-07:00\"\n", + "Duration: \"0:00:05.750176\"\n", + "Output path: \"/Users/cv/.promptflow/.runs/azure_ai_evaluation_evaluators_intent_resolution_20250604_141645_529535\"\n", + "\n", + "2025-06-04 14:16:46 -0700 77407 execution.bulk INFO Current thread is not main thread, skip signal handler registration in BatchEngine.\n", + "2025-06-04 14:16:49 -0700 77407 execution.bulk INFO Finished 1 / 3 lines.\n", + "2025-06-04 14:16:49 -0700 77407 execution.bulk INFO Average execution time for completed lines: 3.09 seconds. Estimated time for incomplete lines: 6.18 seconds.\n", + "2025-06-04 14:16:50 -0700 77407 execution.bulk INFO Finished 2 / 3 lines.\n", + "2025-06-04 14:16:50 -0700 77407 execution.bulk INFO Average execution time for completed lines: 2.02 seconds. Estimated time for incomplete lines: 2.02 seconds.\n", + "2025-06-04 14:16:50 -0700 77407 execution.bulk INFO Finished 3 / 3 lines.\n", + "2025-06-04 14:16:50 -0700 77407 execution.bulk INFO Average execution time for completed lines: 1.35 seconds. Estimated time for incomplete lines: 0.0 seconds.\n", + "======= Run Summary =======\n", + "\n", + "Run name: \"azure_ai_evaluation_evaluators_task_adherence_20250604_141645_529728\"\n", + "Run status: \"Completed\"\n", + "Start time: \"2025-06-04 14:16:45.540607-07:00\"\n", + "Duration: \"0:00:05.768197\"\n", + "Output path: \"/Users/cv/.promptflow/.runs/azure_ai_evaluation_evaluators_task_adherence_20250604_141645_529728\"\n", + "\n", + "======= Combined Run Summary (Per Evaluator) =======\n", + "\n", + "{\n", + " \"tool_call_accuracy\": {\n", + " \"status\": \"Completed\",\n", + " \"duration\": \"0:00:01.776557\",\n", + " \"completed_lines\": 3,\n", + " \"failed_lines\": 0,\n", + " \"log_path\": \"/Users/cv/.promptflow/.runs/azure_ai_evaluation_evaluators_tool_call_accuracy_20250604_141645_529346\"\n", + " },\n", + " \"intent_resolution\": {\n", + " \"status\": \"Completed\",\n", + " \"duration\": \"0:00:05.750176\",\n", + " \"completed_lines\": 3,\n", + " \"failed_lines\": 0,\n", + " \"log_path\": \"/Users/cv/.promptflow/.runs/azure_ai_evaluation_evaluators_intent_resolution_20250604_141645_529535\"\n", + " },\n", + " \"task_adherence\": {\n", + " \"status\": \"Completed\",\n", + " \"duration\": \"0:00:05.768197\",\n", + " \"completed_lines\": 3,\n", + " \"failed_lines\": 0,\n", + " \"log_path\": \"/Users/cv/.promptflow/.runs/azure_ai_evaluation_evaluators_task_adherence_20250604_141645_529728\"\n", + " }\n", + "}\n", + "\n", + "====================================================\n", + "\n", + "('AI Foundary URL: '\n", + " 'https://ai.azure.com/build/evaluation/45e8b927-ff7a-47b2-b37b-66776d2d38eb?wsid=/subscriptions/6415ebd4-1dd7-430f-bd4d-2f5e9419c1cd/resourceGroups/rg-cvi-ai-eng-fair-hub-eastus2/providers/Microsoft.MachineLearningServices/workspaces/cvi-ai-eng-fair-hub-eastus2')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'intent_resolution.binary_aggregate': 1.0,\n", + " 'intent_resolution.intent_resolution': 5.0,\n", + " 'intent_resolution.intent_resolution_threshold': 3.0,\n", + " 'task_adherence.binary_aggregate': 1.0,\n", + " 'task_adherence.task_adherence': 5.0,\n", + " 'task_adherence.task_adherence_threshold': 3.0,\n", + " 'tool_call_accuracy.binary_aggregate': 0.0,\n", + " 'tool_call_accuracy.tool_call_accuracy_threshold': 0.8000000000000002}\n" + ] + } + ], "source": [ "from azure.ai.evaluation import evaluate\n", "\n", @@ -320,9 +646,9 @@ " \"task_adherence\": task_adherence,\n", " },\n", " azure_ai_project={\n", - " \"subscription_id\": os.environ[\"AZURE_SUBSCRIPTION_ID\"],\n", - " \"project_name\": os.environ[\"PROJECT_NAME\"],\n", - " \"resource_group_name\": os.environ[\"RESOURCE_GROUP_NAME\"],\n", + " \"subscription_id\": os.environ[\"REPORT_AZURE_SUBSCRIPTION_ID\"],\n", + " \"project_name\": os.environ[\"REPORT_PROJECT_NAME\"],\n", + " \"resource_group_name\": os.environ[\"REPORT_RESOURCE_GROUP_NAME\"],\n", " },\n", ")\n", "pprint(f'AI Foundary URL: {response.get(\"studio_url\")}')" @@ -339,7 +665,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -352,7 +678,7 @@ ], "metadata": { "kernelspec": { - "display_name": "evaluate-agents-test", + "display_name": ".venv", "language": "python", "name": "python3" }, @@ -365,7 +691,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.13.3" } }, "nbformat": 4, diff --git a/scenarios/evaluate/Supported_Evaluation_Metrics/README.md b/scenarios/evaluate/Supported_Evaluation_Metrics/README.md index 8b9e3dd4..c3415453 100644 --- a/scenarios/evaluate/Supported_Evaluation_Metrics/README.md +++ b/scenarios/evaluate/Supported_Evaluation_Metrics/README.md @@ -27,3 +27,25 @@ Currently, Azure AI Evaluation SDK supports three types of evaluators: You can run evaluators locally or [remotely](../Supported_Evaluation_Targets/Evaluate_On_Cloud/Evaluate_On_Cloud.ipynb), log results in the cloud using the evaluation SDK, or integrate them into automated evaluations within the Azure AI Studio UI. + +## Environment Variables +The following environment variables should be set in a `.env` file at the root of the project: + +### To run the evaluations: +- `AZURE_OPENAI_ENDPOINT`: The endpoint URL for Azure OpenAI. +- `AZURE_OPENAI_DEPLOYMENT`: The deployment name for the Azure OpenAI model (e.g., `gpt-4o`). +- `MODEL_DEPLOYMENT_NAME`: The deployment name for the model used in evaluations (e.g., `gpt-4o`). +- `AGENT_MODEL_DEPLOYMENT_NAME`: The deployment name for the agent model (e.g., `gpt-4o`). +- `AZURE_OPENAI_API_VERSION`: The API version for Azure OpenAI. +- `AZURE_SUBSCRIPTION_ID`: The Azure subscription ID. +- `PROJECT_NAME`: The name of the Azure project. +- `RESOURCE_GROUP_NAME`: The name of the Azure resource group. +- `AZURE_AI_PROJECT`: The Azure AI project identifier. +- `AZURE_OPENAI_API_KEY`: The API key for Azure OpenAI. + +### To upload reports to Azure AI Foundry: +- `REPORT_AZURE_SUBSCRIPTION_ID`: The Azure subscription ID for report uploads. +- `REPORT_PROJECT_NAME`: The project name for report uploads. +- `REPORT_RESOURCE_GROUP_NAME`: The resource group name for report uploads. + +Ensure all these variables are properly configured in your `.env` file before running the application.