From 2d6a728692223db0369fb59743ba5b090021f117 Mon Sep 17 00:00:00 2001 From: Xiaochao Dong Date: Mon, 28 Apr 2025 15:17:46 +0800 Subject: [PATCH 01/25] Remove unused argument from the MultiStepAgent documentation (#1250) --- src/smolagents/agents.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py index 8ff8eb230..ec32101aa 100644 --- a/src/smolagents/agents.py +++ b/src/smolagents/agents.py @@ -177,7 +177,6 @@ class MultiStepAgent(ABC): model (`Callable[[list[dict[str, str]]], ChatMessage]`): Model that will generate the agent's actions. prompt_templates ([`~agents.PromptTemplates`], *optional*): Prompt templates. max_steps (`int`, default `20`): Maximum number of steps the agent can take to solve the task. - tool_parser (`Callable`, *optional*): Function used to parse the tool calls from the LLM output. add_base_tools (`bool`, default `False`): Whether to add the base tools to the agent's tools. verbosity_level (`LogLevel`, default `LogLevel.INFO`): Level of verbosity of the agent's logs. grammar (`dict[str, str]`, *optional*): Grammar used to parse the LLM output. From 1e2cdcdd9ff1cf3d448eab06d44a34fb346cbfcf Mon Sep 17 00:00:00 2001 From: Merve Noyan Date: Mon, 28 Apr 2025 10:38:02 +0200 Subject: [PATCH 02/25] Add billing for inference providers model (#1260) --- pyproject.toml | 2 +- src/smolagents/models.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0db6ab2b8..78f811a86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ authors = [ readme = "README.md" requires-python = ">=3.10" dependencies = [ - "huggingface-hub>=0.28.0", + "huggingface-hub>=0.30.0", "requests>=2.32.3", "rich>=13.9.4", "jinja2>=3.1.4", diff --git a/src/smolagents/models.py b/src/smolagents/models.py index 433f8fbe5..0eb9c99a5 100644 --- a/src/smolagents/models.py +++ b/src/smolagents/models.py @@ -1193,6 +1193,9 @@ class InferenceClientModel(ApiModel): api_key (`str`, *optional*): Token to use for authentication. This is a duplicated argument from `token` to make [`InferenceClientModel`] follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None. + bill_to (`str`, *optional*): + The billing account to use for the requests. By default the requests are billed on the user’s account. Requests can only be billed to + an organization the user is a member of, and which has subscribed to Enterprise Hub. **kwargs: Additional keyword arguments to pass to the Hugging Face API. @@ -1224,6 +1227,7 @@ def __init__( client_kwargs: dict[str, Any] | None = None, custom_role_conversions: dict[str, str] | None = None, api_key: str | None = None, + bill_to: str | None = None, **kwargs, ): if token is not None and api_key is not None: @@ -1241,6 +1245,7 @@ def __init__( "provider": provider, "token": token, "timeout": timeout, + "bill_to": bill_to, } super().__init__(model_id=model_id, custom_role_conversions=custom_role_conversions, **kwargs) From 8758ef3b680c20d3cabcab043364925f9b966f70 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Mon, 28 Apr 2025 16:16:57 +0200 Subject: [PATCH 03/25] Fix return type hint for generate_stream and rename CompletionDelta to ChatMessageStreamDelta (#1247) --- src/smolagents/models.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/smolagents/models.py b/src/smolagents/models.py index 0eb9c99a5..ae930ae5d 100644 --- a/src/smolagents/models.py +++ b/src/smolagents/models.py @@ -139,7 +139,7 @@ def parse_json_if_needed(arguments: str | dict) -> str | dict: @dataclass -class CompletionDelta: +class ChatMessageStreamDelta: content: str | None = None tool_calls: list[ChatMessageToolCall] | None = None @@ -892,7 +892,7 @@ def generate_stream( grammar: str | None = None, tools_to_call_from: list[Tool] | None = None, **kwargs, - ) -> Generator: + ) -> Generator[ChatMessageStreamDelta]: generation_kwargs = self._prepare_completion_args( messages=messages, stop_sequences=stop_sequences, @@ -909,7 +909,7 @@ def generate_stream( # Generate with streaming for new_text in self.streamer: - yield CompletionDelta(content=new_text, tool_calls=None) + yield ChatMessageStreamDelta(content=new_text, tool_calls=None) self.last_output_token_count += 1 self.last_input_token_count = count_prompt_tokens @@ -1044,7 +1044,7 @@ def generate_stream( grammar: str | None = None, tools_to_call_from: list[Tool] | None = None, **kwargs, - ) -> Generator: + ) -> Generator[ChatMessageStreamDelta]: if tools_to_call_from: raise NotImplementedError("Streaming is not yet supported for tool calling") completion_kwargs = self._prepare_completion_kwargs( @@ -1063,7 +1063,7 @@ def generate_stream( if not getattr(event.choices[0], "finish_reason", None): raise ValueError(f"No content or tool calls in event: {event}") else: - yield CompletionDelta( + yield ChatMessageStreamDelta( content=event.choices[0].delta.content, ) if getattr(event, "usage", None): @@ -1285,7 +1285,7 @@ def generate_stream( grammar: str | None = None, tools_to_call_from: list[Tool] | None = None, **kwargs, - ) -> Generator: + ) -> Generator[ChatMessageStreamDelta]: if tools_to_call_from: raise NotImplementedError("Streaming is not yet supported for tool calling") completion_kwargs = self._prepare_completion_kwargs( @@ -1306,7 +1306,7 @@ def generate_stream( if not getattr(event.choices[0], "finish_reason", None): raise ValueError(f"No content or tool calls in event: {event}") else: - yield CompletionDelta( + yield ChatMessageStreamDelta( content=event.choices[0].delta.content, ) if getattr(event, "usage", None): @@ -1391,7 +1391,7 @@ def generate_stream( grammar: str | None = None, tools_to_call_from: list[Tool] | None = None, **kwargs, - ) -> Generator: + ) -> Generator[ChatMessageStreamDelta]: if tools_to_call_from: raise NotImplementedError("Streaming is not yet supported for tool calling") completion_kwargs = self._prepare_completion_kwargs( @@ -1412,7 +1412,7 @@ def generate_stream( if not getattr(event.choices[0], "finish_reason", None): raise ValueError(f"No content or tool calls in event: {event}") else: - yield CompletionDelta( + yield ChatMessageStreamDelta( content=event.choices[0].delta.content, ) if getattr(event, "usage", None): From 08319f2f771343fb40c8be43719cc3bed7716828 Mon Sep 17 00:00:00 2001 From: Aymeric Roucher <69208727+aymeric-roucher@users.noreply.github.com> Date: Wed, 30 Apr 2025 15:59:55 +0200 Subject: [PATCH 04/25] Fix Loading GAIA dataset (#1266) Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> --- examples/open_deep_research/README.md | 12 ++- examples/open_deep_research/analysis.ipynb | 35 +----- examples/open_deep_research/run_gaia.py | 100 +++++++++++------- .../open_deep_research/scripts/run_agents.py | 3 +- 4 files changed, 81 insertions(+), 69 deletions(-) diff --git a/examples/open_deep_research/README.md b/examples/open_deep_research/README.md index c2c799616..bfd372faf 100644 --- a/examples/open_deep_research/README.md +++ b/examples/open_deep_research/README.md @@ -51,4 +51,14 @@ For example, to use the default `o1` model, you need to set the `OPENAI_API_KEY` Then you're good to go! Run the run.py script, as in: ```bash python run.py --model-id "o1" "Your question here!" -``` \ No newline at end of file +``` + +## Full reproducibility of results + +The data used in our submissions to GAIA was augmented in this way: + - For each single-page .pdf or .xls file, it was opened in a file reader (MacOS Sonoma Numbers or Preview), and a ".png" screenshot was taken and added to the folder. +- Then for any file used in a question, the file loading system checks if there is a ".png" extension version of the file, and loads it instead of the original if it exists. + +This process was done manually but could be automatized. + +After processing, the annotated was uploaded to a [new dataset](https://huggingface.co/datasets/smolagents/GAIA-annotated). You need to request access (granted instantly). \ No newline at end of file diff --git a/examples/open_deep_research/analysis.ipynb b/examples/open_deep_research/analysis.ipynb index ccb6a1d54..53e7bd430 100644 --- a/examples/open_deep_research/analysis.ipynb +++ b/examples/open_deep_research/analysis.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "# !pip install plotly kaleido datasets nbformat -U -q" + "!pip install plotly kaleido datasets nbformat -U -q" ] }, { @@ -28,7 +28,7 @@ "\n", "pd.set_option(\"max_colwidth\", None)\n", "\n", - "OUTPUT_DIR = \"../../output\"" + "OUTPUT_DIR = \"output\"" ] }, { @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 88, "metadata": {}, "outputs": [], "source": [ @@ -134,31 +134,6 @@ "# result_df[\"tool_calls\"] = result_df[\"intermediate_steps\"].apply(sum_tool_calls)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def get_thoughts(x):\n", - " try:\n", - " output = x[0][\"task\"]\n", - " for y in x[1:]:\n", - " try:\n", - " if \"observation\" in y:\n", - " output += y[\"llm_output\"] + \"\\nObservation:\" + y[\"observation\"]\n", - " else:\n", - " output += y[\"llm_output\"] + r\"\\Error:\" + str(y[\"error\"])\n", - " except Exception:\n", - " pass\n", - " return output\n", - " except Exception:\n", - " return None\n", - "\n", - "\n", - "result_df[\"thoughts\"] = result_df[\"intermediate_steps\"].apply(lambda x: get_thoughts(x))" - ] - }, { "cell_type": "code", "execution_count": null, @@ -460,9 +435,9 @@ ], "metadata": { "kernelspec": { - "display_name": "test", + "display_name": "agents", "language": "python", - "name": "test" + "name": "python3" }, "language_info": { "codemirror_mode": { diff --git a/examples/open_deep_research/run_gaia.py b/examples/open_deep_research/run_gaia.py index 192081787..9c7bacd4e 100644 --- a/examples/open_deep_research/run_gaia.py +++ b/examples/open_deep_research/run_gaia.py @@ -1,4 +1,4 @@ -# EXAMPLE COMMAND: python examples/open_deep_research/run_gaia.py --concurrency 32 --run-name generate-traces-03-apr-noplanning --model-id gpt-4o +# EXAMPLE COMMAND: from folder examples/open_deep_research, run: python run_gaia.py --concurrency 32 --run-name generate-traces-03-apr-noplanning --model-id gpt-4o import argparse import json import os @@ -6,11 +6,12 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime from pathlib import Path +from typing import Any import datasets import pandas as pd from dotenv import load_dotenv -from huggingface_hub import login +from huggingface_hub import login, snapshot_download from scripts.reformulator import prepare_response from scripts.run_agents import ( get_single_file_description, @@ -49,35 +50,18 @@ def parse_args(): parser.add_argument("--concurrency", type=int, default=8) parser.add_argument("--model-id", type=str, default="o1") parser.add_argument("--run-name", type=str, required=True) + parser.add_argument("--set-to-run", type=str, default="validation") + parser.add_argument("--use-open-models", type=bool, default=False) + parser.add_argument("--use-raw-dataset", action="store_true") return parser.parse_args() ### IMPORTANT: EVALUATION SWITCHES -print("Make sure you deactivated Tailscale VPN, else some URLs will be blocked!") - -USE_OPEN_MODELS = False - -SET = "validation" +print("Make sure you deactivated any VPN like Tailscale, else some URLs will be blocked!") custom_role_conversions = {"tool-call": "assistant", "tool-response": "user"} -### LOAD EVALUATION DATASET - -eval_ds = datasets.load_dataset("gaia-benchmark/GAIA", "2023_all")[SET] -eval_ds = eval_ds.rename_columns({"Question": "question", "Final answer": "true_answer", "Level": "task"}) - - -def preprocess_file_paths(row): - if len(row["file_name"]) > 0: - row["file_name"] = f"data/gaia/{SET}/" + row["file_name"] - return row - - -eval_ds = eval_ds.map(preprocess_file_paths) -eval_df = pd.DataFrame(eval_ds) -print("Loaded evaluation dataset:") -print(eval_df["task"].value_counts()) user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0" @@ -142,17 +126,54 @@ def create_agent_team(model: Model): return manager_agent +def load_gaia_dataset(use_raw_dataset: bool, set_to_run: str) -> datasets.Dataset: + if not os.path.exists("data/gaia"): + if use_raw_dataset: + snapshot_download( + repo_id="gaia-benchmark/GAIA", + repo_type="dataset", + local_dir="data/gaia", + ignore_patterns=[".gitattributes", "README.md"], + ) + else: + # WARNING: this dataset is gated: make sure you visit the repo to require access. + snapshot_download( + repo_id="smolagents/GAIA-annotated", + repo_type="dataset", + local_dir="data/gaia", + ignore_patterns=[".gitattributes", "README.md"], + ) + + def preprocess_file_paths(row): + if len(row["file_name"]) > 0: + row["file_name"] = f"data/gaia/{set_to_run}/" + row["file_name"] + return row + + eval_ds = datasets.load_dataset( + "data/gaia/GAIA.py", + name="2023_all", + split=set_to_run, + # data_files={"validation": "validation/metadata.jsonl", "test": "test/metadata.jsonl"}, + ) + + eval_ds = eval_ds.rename_columns({"Question": "question", "Final answer": "true_answer", "Level": "task"}) + eval_ds = eval_ds.map(preprocess_file_paths) + return eval_ds + + def append_answer(entry: dict, jsonl_file: str) -> None: - jsonl_file = Path(jsonl_file) - jsonl_file.parent.mkdir(parents=True, exist_ok=True) + jsonl_path = Path(jsonl_file) + jsonl_path.parent.mkdir(parents=True, exist_ok=True) with append_answer_lock, open(jsonl_file, "a", encoding="utf-8") as fp: fp.write(json.dumps(entry) + "\n") - assert os.path.exists(jsonl_file), "File not found!" - print("Answer exported to file:", jsonl_file.resolve()) + assert jsonl_path.exists(), "File not found!" + print("Answer exported to file:", jsonl_path.resolve()) -def answer_single_question(example, model_id, answers_file, visual_inspection_tool): - model_params = { +def answer_single_question( + example: dict, model_id: str, answers_file: str, visual_inspection_tool: TextInspectorTool +) -> None: + model_params: dict[str, Any] = { "model_id": model_id, "custom_role_conversions": custom_role_conversions, } @@ -162,15 +183,16 @@ def answer_single_question(example, model_id, answers_file, visual_inspection_to else: model_params["max_tokens"] = 4096 model = LiteLLMModel(**model_params) - # model = InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct", provider="together", max_tokens=4096) + # model = InferenceClientModel(model_id="Qwen/Qwen3-32B", provider="novita", max_tokens=4096) document_inspection_tool = TextInspectorTool(model, 100000) agent = create_agent_team(model) augmented_question = """You have one question to answer. It is paramount that you provide a correct answer. -Give it all you can: I know for a fact that you have access to all the relevant tools to solve it and find the correct answer (the answer does exist). Failure or 'I cannot answer' or 'None found' will not be tolerated, success will be rewarded. -Run verification steps if that's needed, you must make sure you find the correct answer! -Here is the task: +Give it all you can: I know for a fact that you have access to all the relevant tools to solve it and find the correct answer (the answer does exist). +Failure or 'I cannot answer' or 'None found' will not be tolerated, success will be rewarded. +Run verification steps if that's needed, you must make sure you find the correct answer! Here is the task: + """ + example["question"] if example["file_name"]: @@ -180,7 +202,7 @@ def answer_single_question(example, model_id, answers_file, visual_inspection_to example["file_name"], example["question"], visual_inspection_tool, document_inspection_tool ) else: - prompt_use_files = "\n\nTo solve the task above, you will have to use this attached file:" + prompt_use_files = "\n\nTo solve the task above, you will have to use this attached file:\n" prompt_use_files += get_single_file_description( example["file_name"], example["question"], visual_inspection_tool, document_inspection_tool ) @@ -241,7 +263,7 @@ def answer_single_question(example, model_id, answers_file, visual_inspection_to append_answer(annotated_example, answers_file) -def get_examples_to_answer(answers_file, eval_ds) -> list[dict]: +def get_examples_to_answer(answers_file: str, eval_ds: datasets.Dataset) -> list[dict]: print(f"Loading answers from {answers_file}...") try: done_questions = pd.read_json(answers_file, lines=True)["question"].tolist() @@ -250,14 +272,18 @@ def get_examples_to_answer(answers_file, eval_ds) -> list[dict]: print("Error when loading records: ", e) print("No usable records! ▶️ Starting new.") done_questions = [] - return [line for line in eval_ds.to_list() if line["question"] not in done_questions] + return [line for line in eval_ds.to_list() if line["question"] not in done_questions and line["file_name"]] def main(): args = parse_args() print(f"Starting run with arguments: {args}") - answers_file = f"output/{SET}/{args.run_name}.jsonl" + eval_ds = load_gaia_dataset(args.use_raw_dataset, args.set_to_run) + print("Loaded evaluation dataset:") + print(pd.DataFrame(eval_ds)["task"].value_counts()) + + answers_file = f"output/{args.set_to_run}/{args.run_name}.jsonl" tasks_to_run = get_examples_to_answer(answers_file, eval_ds) with ThreadPoolExecutor(max_workers=args.concurrency) as exe: diff --git a/examples/open_deep_research/scripts/run_agents.py b/examples/open_deep_research/scripts/run_agents.py index 37da8a40e..e2e020cb3 100644 --- a/examples/open_deep_research/scripts/run_agents.py +++ b/examples/open_deep_research/scripts/run_agents.py @@ -38,12 +38,13 @@ def get_single_file_description(file_path: str, question: str, visual_inspection ) return file_description elif file_extension in ["pdf", "xls", "xlsx", "docx", "doc", "xml"]: - file_description = f" - Attached document: {file_path}" image_path = file_path.split(".")[0] + ".png" if os.path.exists(image_path): description = get_image_description(image_path, question, visual_inspection_tool) + file_path = image_path else: description = get_document_description(file_path, question, document_inspection_tool) + file_description = f" - Attached document: {file_path}" file_description += f"\n -> File description: {description}" return file_description elif file_extension in ["mp3", "m4a", "wav"]: From d02f0cc6a98383662a202f34053deca5c5942ee1 Mon Sep 17 00:00:00 2001 From: Aymeric Roucher <69208727+aymeric-roucher@users.noreply.github.com> Date: Wed, 30 Apr 2025 16:15:51 +0200 Subject: [PATCH 05/25] Working streaming Gradio chatbot outputs (#1246) Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> --- examples/agent_from_any_llm.py | 2 +- examples/gradio_ui.py | 16 ++---- examples/multi_llm_agent.py | 7 +-- examples/multiple_tools.py | 3 +- examples/rag.py | 3 +- src/smolagents/agents.py | 99 ++++++++++++++++++++++------------ src/smolagents/gradio_ui.py | 99 +++++++++++++++++++++++----------- tests/test_agents.py | 15 +++--- tests/test_monitoring.py | 6 +-- 9 files changed, 153 insertions(+), 97 deletions(-) diff --git a/examples/agent_from_any_llm.py b/examples/agent_from_any_llm.py index bc421274c..d5e33f0a1 100644 --- a/examples/agent_from_any_llm.py +++ b/examples/agent_from_any_llm.py @@ -52,6 +52,6 @@ def get_weather(location: str, celsius: bool | None = False) -> str: print("ToolCallingAgent:", agent.run("What's the weather like in Paris?")) -agent = CodeAgent(tools=[get_weather], model=model, verbosity_level=2) +agent = CodeAgent(tools=[get_weather], model=model, verbosity_level=2, stream_outputs=True) print("CodeAgent:", agent.run("What's the weather like in Paris?")) diff --git a/examples/gradio_ui.py b/examples/gradio_ui.py index 81c56a1f2..87f532689 100644 --- a/examples/gradio_ui.py +++ b/examples/gradio_ui.py @@ -1,25 +1,15 @@ -from io import BytesIO - -import requests -from PIL import Image - from smolagents import CodeAgent, GradioUI, InferenceClientModel -def add_agent_image(memory_step, agent): - url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/smolagents/smolagents.png" - response = requests.get(url) - memory_step.observations_images = [Image.open(BytesIO(response.content))] - - agent = CodeAgent( tools=[], model=InferenceClientModel(), verbosity_level=1, planning_interval=3, name="example_agent", - description="This is an example agent that has not tool but will always see an agent at the end of its step.", - step_callbacks=[add_agent_image], + description="This is an example agent.", + step_callbacks=[], + stream_outputs=False, ) GradioUI(agent, file_upload_folder="./data").launch() diff --git a/examples/multi_llm_agent.py b/examples/multi_llm_agent.py index 186fa06f8..e46354e03 100644 --- a/examples/multi_llm_agent.py +++ b/examples/multi_llm_agent.py @@ -3,10 +3,7 @@ from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMRouterModel -os.environ["OPENAI_API_KEY"] = "" -os.environ["AWS_ACCESS_KEY_ID"] = "" -os.environ["AWS_SECRET_ACCESS_KEY"] = "" -os.environ["AWS_REGION"] = "" +# Make sure to setup the necessary environment variables! llm_loadbalancer_model_list = [ { @@ -42,6 +39,6 @@ model_list=llm_loadbalancer_model_list, client_kwargs={"routing_strategy": "simple-shuffle"}, ) -agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model) +agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model, stream_outputs=True) agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") diff --git a/examples/multiple_tools.py b/examples/multiple_tools.py index a2685541f..1a56e3519 100644 --- a/examples/multiple_tools.py +++ b/examples/multiple_tools.py @@ -243,11 +243,12 @@ def search_wikipedia(query: str) -> str: search_wikipedia, ], model=model, + stream_outputs=True, ) # Uncomment the line below to run the agent with a specific query -agent.run("5000 dollars to Euros") +agent.run("Convert 5000 dollars to Euros") # agent.run("What is the weather in New York?") # agent.run("Give me the top news headlines") # agent.run("Tell me a joke") diff --git a/examples/rag.py b/examples/rag.py index 3ff572fb3..6f378d11e 100644 --- a/examples/rag.py +++ b/examples/rag.py @@ -58,9 +58,10 @@ def forward(self, query: str) -> str: retriever_tool = RetrieverTool(docs_processed) agent = CodeAgent( tools=[retriever_tool], - model=InferenceClientModel(model_id="meta-llama/Llama-3.3-70B-Instruct"), + model=InferenceClientModel(model_id="Qwen/Qwen2.5-Coder-32B-Instruct"), max_steps=4, verbosity_level=2, + stream_outputs=True, ) agent_output = agent.run("For a transformers model training, which is slower, the forward or the backward pass?") diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py index ec32101aa..d052f144a 100644 --- a/src/smolagents/agents.py +++ b/src/smolagents/agents.py @@ -23,7 +23,6 @@ import textwrap import time from abc import ABC, abstractmethod -from collections import deque from collections.abc import Callable, Generator from logging import getLogger from pathlib import Path @@ -57,7 +56,7 @@ TaskStep, ToolCall, ) -from .models import ChatMessage, MessageRole, Model, parse_json_if_needed +from .models import ChatMessage, ChatMessageStreamDelta, MessageRole, Model, parse_json_if_needed from .monitoring import ( YELLOW_HEX, AgentLogger, @@ -248,6 +247,7 @@ def __init__( self.monitor = Monitor(self.model, self.logger) self.step_callbacks = step_callbacks if step_callbacks is not None else [] self.step_callbacks.append(self.monitor.update_metrics) + self.stream_outputs = False def _validate_name(self, name: str | None) -> str | None: if name is not None and not is_valid_name(name): @@ -346,11 +346,11 @@ def run( if stream: # The steps are returned as they are executed through a generator to iterate on. - return self._run(task=self.task, max_steps=max_steps, images=images) + return self._run_stream(task=self.task, max_steps=max_steps, images=images) # Outputs are returned only at the end. We only look at the last step. - return deque(self._run(task=self.task, max_steps=max_steps, images=images), maxlen=1)[0].final_answer + return list(self._run_stream(task=self.task, max_steps=max_steps, images=images))[-1].final_answer - def _run( + def _run_stream( self, task: str, max_steps: int, images: list["PIL.Image.Image"] | None = None ) -> Generator[ActionStep | PlanningStep | FinalAnswerStep]: final_answer = None @@ -362,16 +362,18 @@ def _run( if self.planning_interval is not None and ( self.step_number == 1 or (self.step_number - 1) % self.planning_interval == 0 ): - planning_step = self._generate_planning_step( + for element in self._generate_planning_step( task, is_first_step=(self.step_number == 1), step=self.step_number - ) - self.memory.steps.append(planning_step) - yield planning_step + ): + yield element + self.memory.steps.append(element) action_step = ActionStep( step_number=self.step_number, start_time=step_start_time, observations_images=images ) try: - final_answer = self._execute_step(task, action_step) + for el in self._execute_step(action_step): + yield el + final_answer = el except AgentGenerationError as e: # Agent generation errors are not caused by a Model error but an implementation error: so we should raise them and exit. raise e @@ -389,12 +391,15 @@ def _run( yield action_step yield FinalAnswerStep(handle_agent_output_types(final_answer)) - def _execute_step(self, task: str, memory_step: ActionStep) -> None | Any: + def _execute_step(self, memory_step: ActionStep) -> Generator[Any]: self.logger.log_rule(f"Step {self.step_number}", level=LogLevel.INFO) - final_answer = self.step(memory_step) + final_answer = None + for el in self._step_stream(memory_step): + final_answer = el + yield el if final_answer is not None and self.final_answer_checks: self._validate_final_answer(final_answer) - return final_answer + yield final_answer def _validate_final_answer(self, final_answer: Any): for check_function in self.final_answer_checks: @@ -427,7 +432,9 @@ def _handle_max_steps_reached(self, task: str, images: list["PIL.Image.Image"], ) return final_answer - def _generate_planning_step(self, task, is_first_step: bool, step: int) -> PlanningStep: + def _generate_planning_step( + self, task, is_first_step: bool, step: int + ) -> Generator[ChatMessageStreamDelta, PlanningStep]: if is_first_step: input_messages = [ { @@ -443,9 +450,15 @@ def _generate_planning_step(self, task, is_first_step: bool, step: int) -> Plann ], } ] - plan_message = self.model(input_messages, stop_sequences=[""]) + if self.stream_outputs and hasattr(self.model, "generate_stream"): + plan_message_content = "" + for completion_delta in self.model.generate_stream(input_messages, stop_sequences=[""]): # type: ignore + plan_message_content += completion_delta.content + yield completion_delta + else: + plan_message_content = self.model.generate(input_messages, stop_sequences=[""]).content plan = textwrap.dedent( - f"""Here are the facts I know and the plan of action that I will follow to solve the task:\n```\n{plan_message.content}\n```""" + f"""Here are the facts I know and the plan of action that I will follow to solve the task:\n```\n{plan_message_content}\n```""" ) else: # Summary mode removes the system prompt and previous planning messages output by the model. @@ -480,16 +493,22 @@ def _generate_planning_step(self, task, is_first_step: bool, step: int) -> Plann ], } input_messages = [plan_update_pre] + memory_messages + [plan_update_post] - plan_message = self.model(input_messages, stop_sequences=[""]) + if self.stream_outputs and hasattr(self.model, "generate_stream"): + plan_message_content = "" + for completion_delta in self.model.generate_stream(input_messages, stop_sequences=[""]): # type: ignore + plan_message_content += completion_delta.content + yield completion_delta + else: + plan_message_content = self.model.generate(input_messages, stop_sequences=[""]).content plan = textwrap.dedent( - f"""I still need to solve the task I was given:\n```\n{self.task}\n```\n\nHere are the facts I know and my new/updated plan of action to solve the task:\n```\n{plan_message.content}\n```""" + f"""I still need to solve the task I was given:\n```\n{self.task}\n```\n\nHere are the facts I know and my new/updated plan of action to solve the task:\n```\n{plan_message_content}\n```""" ) log_headline = "Initial plan" if is_first_step else "Updated plan" self.logger.log(Rule(f"[bold]{log_headline}", style="orange"), Text(plan), level=LogLevel.INFO) - return PlanningStep( + yield PlanningStep( model_input_messages=input_messages, plan=plan, - model_output_message=plan_message, + model_output_message=ChatMessage(role=MessageRole.ASSISTANT, content=plan_message_content), ) @property @@ -522,9 +541,19 @@ def write_memory_to_messages( messages.extend(memory_step.to_messages(summary_mode=summary_mode)) return messages - def visualize(self): - """Creates a rich tree visualization of the agent's structure.""" - self.logger.visualize_agent_tree(self) + def _step_stream(self, memory_step: ActionStep) -> Generator[Any]: + """ + Perform one step in the ReAct framework: the agent thinks, acts, and observes the result. + Yields either None if the step is not final, or the final answer. + """ + raise NotImplementedError("This method should be implemented in child classes") + + def step(self, memory_step: ActionStep) -> Any: + """ + Perform one step in the ReAct framework: the agent thinks, acts, and observes the result. + Returns either None if the step is not final, or the final answer. + """ + return list(self._step_stream(memory_step))[-1] def extract_action(self, model_output: str, split_token: str) -> tuple[str, str]: """ @@ -591,10 +620,9 @@ def provide_final_answer(self, task: str, images: list["PIL.Image.Image"] | None except Exception as e: return f"Error in generating final LLM output:\n{e}" - @abstractmethod - def step(self, memory_step: ActionStep) -> None | Any: - """To be implemented in children classes. Should return either None if the step is not final.""" - pass + def visualize(self): + """Creates a rich tree visualization of the agent's structure.""" + self.logger.visualize_agent_tree(self) def replay(self, detailed: bool = False): """Prints a pretty replay of the agent's steps. @@ -1007,10 +1035,10 @@ def initialize_system_prompt(self) -> str: ) return system_prompt - def step(self, memory_step: ActionStep) -> None | Any: + def _step_stream(self, memory_step: ActionStep) -> Generator[Any]: """ Perform one step in the ReAct framework: the agent thinks, acts, and observes the result. - Returns None if the step is not final. + Yields either None if the step is not final, or the final answer. """ memory_messages = self.write_memory_to_messages() @@ -1081,7 +1109,7 @@ def step(self, memory_step: ActionStep) -> None | Any: ) memory_step.action_output = final_answer - return final_answer + yield final_answer else: if tool_arguments is None: tool_arguments = {} @@ -1103,7 +1131,7 @@ def step(self, memory_step: ActionStep) -> None | Any: level=LogLevel.INFO, ) memory_step.observations = updated_information - return None + yield None def _substitute_state_variables(self, arguments: dict[str, str] | str) -> dict[str, Any] | str: """Replace string values in arguments with their corresponding state values if they exist.""" @@ -1271,10 +1299,10 @@ def initialize_system_prompt(self) -> str: ) return system_prompt - def step(self, memory_step: ActionStep) -> None | Any: + def _step_stream(self, memory_step: ActionStep) -> Generator[Any]: """ Perform one step in the ReAct framework: the agent thinks, acts, and observes the result. - Returns None if the step is not final. + Yields either None if the step is not final, or the final answer. """ memory_messages = self.write_memory_to_messages() @@ -1295,13 +1323,14 @@ def step(self, memory_step: ActionStep) -> None | Any: if event.content is not None: output_text += event.content live.update(Markdown(output_text)) + yield event model_output = output_text chat_message = ChatMessage(role="assistant", content=model_output) memory_step.model_output_message = chat_message model_output = chat_message.content else: - chat_message: ChatMessage = self.model( + chat_message: ChatMessage = self.model.generate( input_messages, stop_sequences=["", "Observation:", "Calling tools:"], **additional_args, @@ -1381,7 +1410,7 @@ def step(self, memory_step: ActionStep) -> None | Any: ] self.logger.log(Group(*execution_outputs_console), level=LogLevel.INFO) memory_step.action_output = output - return output if is_final_answer else None + yield output if is_final_answer else None def to_dict(self) -> dict[str, Any]: """Convert the agent to a dictionary representation. diff --git a/src/smolagents/gradio_ui.py b/src/smolagents/gradio_ui.py index 83fbaff3d..ec089b397 100644 --- a/src/smolagents/gradio_ui.py +++ b/src/smolagents/gradio_ui.py @@ -16,10 +16,12 @@ import os import re import shutil +from pathlib import Path from smolagents.agent_types import AgentAudio, AgentImage, AgentText from smolagents.agents import MultiStepAgent, PlanningStep from smolagents.memory import ActionStep, FinalAnswerStep, MemoryStep +from smolagents.models import ChatMessageStreamDelta from smolagents.utils import _is_package_available @@ -36,10 +38,14 @@ def get_step_footnote_content(step_log: MemoryStep, step_name: str) -> str: return step_footnote_content -def pull_messages_from_step( - step_log: MemoryStep, -): - """Extract ChatMessage objects from agent steps with proper nesting""" +def pull_messages_from_step(step_log: MemoryStep, skip_model_outputs: bool = False): + """Extract ChatMessage objects from agent steps with proper nesting. + + Args: + step_log: The step log to display as gr.ChatMessage objects. + skip_model_outputs: If True, skip the model outputs when creating the gr.ChatMessage objects: + This is used for instance when streaming model outputs have already been displayed. + """ if not _is_package_available("gradio"): raise ModuleNotFoundError( "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`" @@ -49,24 +55,23 @@ def pull_messages_from_step( if isinstance(step_log, ActionStep): # Output the step number step_number = f"Step {step_log.step_number}" if step_log.step_number is not None else "Step" - yield gr.ChatMessage(role="assistant", content=f"**{step_number}**") # First yield the thought/reasoning from the LLM - if hasattr(step_log, "model_output") and step_log.model_output is not None: - # Clean up the LLM output + if not skip_model_outputs: + yield gr.ChatMessage(role="assistant", content=f"**{step_number}**", metadata={"status": "done"}) + elif skip_model_outputs and hasattr(step_log, "model_output") and step_log.model_output is not None: model_output = step_log.model_output.strip() # Remove any trailing and extra backticks, handling multiple possible formats model_output = re.sub(r"```\s*", "```", model_output) # handles ``` model_output = re.sub(r"\s*```", "```", model_output) # handles ``` model_output = re.sub(r"```\s*\n\s*", "```", model_output) # handles ```\n model_output = model_output.strip() - yield gr.ChatMessage(role="assistant", content=model_output) + yield gr.ChatMessage(role="assistant", content=model_output, metadata={"status": "done"}) # For tool calls, create a parent message if hasattr(step_log, "tool_calls") and step_log.tool_calls is not None: first_tool_call = step_log.tool_calls[0] used_code = first_tool_call.name == "python_interpreter" - parent_id = f"call_{len(step_log.tool_calls)}" # Tool call becomes the parent message with timing info # First we will handle arguments based on type @@ -89,7 +94,6 @@ def pull_messages_from_step( content=content, metadata={ "title": f"🛠️ Used tool {first_tool_call.name}", - "id": parent_id, "status": "done", }, ) @@ -128,15 +132,21 @@ def pull_messages_from_step( # Handle standalone errors but not from tool calls if hasattr(step_log, "error") and step_log.error is not None: - yield gr.ChatMessage(role="assistant", content=str(step_log.error), metadata={"title": "💥 Error"}) + yield gr.ChatMessage( + role="assistant", content=str(step_log.error), metadata={"title": "💥 Error", "status": "done"} + ) - yield gr.ChatMessage(role="assistant", content=get_step_footnote_content(step_log, step_number)) + yield gr.ChatMessage( + role="assistant", content=get_step_footnote_content(step_log, step_number), metadata={"status": "done"} + ) yield gr.ChatMessage(role="assistant", content="-----", metadata={"status": "done"}) elif isinstance(step_log, PlanningStep): - yield gr.ChatMessage(role="assistant", content="**Planning step**") - yield gr.ChatMessage(role="assistant", content=step_log.plan) - yield gr.ChatMessage(role="assistant", content=get_step_footnote_content(step_log, "Planning step")) + yield gr.ChatMessage(role="assistant", content="**Planning step**", metadata={"status": "done"}) + yield gr.ChatMessage(role="assistant", content=step_log.plan, metadata={"status": "done"}) + yield gr.ChatMessage( + role="assistant", content=get_step_footnote_content(step_log, "Planning step"), metadata={"status": "done"} + ) yield gr.ChatMessage(role="assistant", content="-----", metadata={"status": "done"}) elif isinstance(step_log, FinalAnswerStep): @@ -145,19 +155,24 @@ def pull_messages_from_step( yield gr.ChatMessage( role="assistant", content=f"**Final answer:**\n{final_answer.to_string()}\n", + metadata={"status": "done"}, ) elif isinstance(final_answer, AgentImage): yield gr.ChatMessage( role="assistant", content={"path": final_answer.to_string(), "mime_type": "image/png"}, + metadata={"status": "done"}, ) elif isinstance(final_answer, AgentAudio): yield gr.ChatMessage( role="assistant", content={"path": final_answer.to_string(), "mime_type": "audio/wav"}, + metadata={"status": "done"}, ) else: - yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}") + yield gr.ChatMessage( + role="assistant", content=f"**Final answer:** {str(final_answer)}", metadata={"status": "done"} + ) else: raise ValueError(f"Unsupported step type: {type(step_log)}") @@ -174,6 +189,13 @@ def stream_to_gradio( total_input_tokens = 0 total_output_tokens = 0 + if not _is_package_available("gradio"): + raise ModuleNotFoundError( + "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`" + ) + + intermediate_text = "" + for step_log in agent.run( task, images=task_images, stream=True, reset=reset_agent_memory, additional_args=additional_args ): @@ -185,10 +207,17 @@ def stream_to_gradio( step_log.input_token_count = agent.model.last_input_token_count step_log.output_token_count = agent.model.last_output_token_count - for message in pull_messages_from_step( - step_log, - ): - yield message + if isinstance(step_log, MemoryStep): + intermediate_text = "" + for message in pull_messages_from_step( + step_log, + # If we're streaming model outputs, no need to display them twice + skip_model_outputs=getattr(agent, "stream_outputs", False), + ): + yield message + elif isinstance(step_log, ChatMessageStreamDelta): + intermediate_text += step_log.content or "" + yield intermediate_text class GradioUI: @@ -200,12 +229,12 @@ def __init__(self, agent: MultiStepAgent, file_upload_folder: str | None = None) "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`" ) self.agent = agent - self.file_upload_folder = file_upload_folder + self.file_upload_folder = Path(file_upload_folder) if file_upload_folder is not None else None self.name = getattr(agent, "name") or "Agent interface" self.description = getattr(agent, "description", None) if self.file_upload_folder is not None: - if not os.path.exists(file_upload_folder): - os.mkdir(file_upload_folder) + if not self.file_upload_folder.exists(): + self.file_upload_folder.mkdir(parents=True, exist_ok=True) def interact_with_agent(self, prompt, messages, session_state): import gradio as gr @@ -215,11 +244,22 @@ def interact_with_agent(self, prompt, messages, session_state): session_state["agent"] = self.agent try: - messages.append(gr.ChatMessage(role="user", content=prompt)) + messages.append(gr.ChatMessage(role="user", content=prompt, metadata={"status": "done"})) yield messages for msg in stream_to_gradio(session_state["agent"], task=prompt, reset_agent_memory=False): - messages.append(msg) + if isinstance(msg, gr.ChatMessage): + messages.append(msg) + elif isinstance(msg, str): # Then it's only a completion delta + try: + if messages[-1].metadata["status"] == "pending": + messages[-1].content = msg + else: + messages.append( + gr.ChatMessage(role="assistant", content=msg, metadata={"status": "pending"}) + ) + except Exception as e: + raise e yield messages yield messages @@ -309,12 +349,9 @@ def create_app(self): [upload_status, file_uploads_log], ) - gr.HTML("

Powered by:

") - with gr.Row(): - gr.HTML("""""") + gr.HTML( + "

Powered by smolagents

" + ) # Main chat interface chatbot = gr.Chatbot( diff --git a/tests/test_agents.py b/tests/test_agents.py index 826c2f2e4..31e188a64 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -17,9 +17,9 @@ import tempfile import unittest import uuid +from collections.abc import Generator from contextlib import nullcontext as does_not_raise from pathlib import Path -from typing import Any from unittest.mock import MagicMock, patch import pytest @@ -620,8 +620,8 @@ def __init__(self, name, tools, description="Mock agent description"): class DummyMultiStepAgent(MultiStepAgent): - def step(self, memory_step: ActionStep) -> None | Any: - return super().step(memory_step) + def step(self, memory_step: ActionStep) -> Generator[None]: + yield None def initialize_system_prompt(self): pass @@ -682,7 +682,7 @@ def test_step_number(self): fake_model.last_input_token_count = 10 fake_model.last_output_token_count = 20 max_steps = 2 - agent = DummyMultiStepAgent(tools=[], model=fake_model, max_steps=max_steps) + agent = CodeAgent(tools=[], model=fake_model, max_steps=max_steps) assert hasattr(agent, "step_number"), "step_number attribute should be defined" assert agent.step_number == 0, "step_number should be initialized to 0" agent.run("Test task") @@ -719,7 +719,8 @@ def test_planning_step(self, step, expected_messages_list): model=fake_model, ) task = "Test task" - planning_step = agent._generate_planning_step(task, is_first_step=(step == 1), step=step) + + planning_step = list(agent._generate_planning_step(task, is_first_step=(step == 1), step=step))[-1] expected_message_texts = { "INITIAL_PLAN_USER_PROMPT": populate_template( agent.prompt_templates["planning"]["initial_plan"], @@ -764,8 +765,8 @@ def test_planning_step(self, step, expected_messages_list): for content, expected_content in zip(message["content"], expected_message["content"]): assert content == expected_content # Test calls to model - assert len(fake_model.call_args_list) == 1 - for call_args, expected_messages in zip(fake_model.call_args_list, expected_messages_list): + assert len(fake_model.generate.call_args_list) == 1 + for call_args, expected_messages in zip(fake_model.generate.call_args_list, expected_messages_list): assert len(call_args.args) == 1 messages = call_args.args[0] assert isinstance(messages, list) diff --git a/tests/test_monitoring.py b/tests/test_monitoring.py index 41bbc8b8e..8d2f50c0a 100644 --- a/tests/test_monitoring.py +++ b/tests/test_monitoring.py @@ -137,7 +137,7 @@ def test_streaming_agent_text_output(self): # Use stream_to_gradio to capture the output outputs = list(stream_to_gradio(agent, task="Test task")) - self.assertEqual(len(outputs), 11) + self.assertEqual(len(outputs), 10) plan_message = outputs[1] self.assertEqual(plan_message.role, "assistant") self.assertIn("Code:", plan_message.content) @@ -161,7 +161,7 @@ def test_streaming_agent_image_output(self): ) ) - self.assertEqual(len(outputs), 6) + self.assertEqual(len(outputs), 5) final_message = outputs[-1] self.assertEqual(final_message.role, "assistant") self.assertIsInstance(final_message.content, dict) @@ -182,7 +182,7 @@ def generate(self, prompt, **kwargs): # Use stream_to_gradio to capture the output outputs = list(stream_to_gradio(agent, task="Test task")) - self.assertEqual(len(outputs), 13) + self.assertEqual(len(outputs), 11) final_message = outputs[-1] self.assertEqual(final_message.role, "assistant") self.assertIn("Malformed call", final_message.content) From e645646b869ed6e261f1c5e72f0d86678d8a25e5 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Wed, 30 Apr 2025 16:20:50 +0200 Subject: [PATCH 06/25] Update Open DeepResearch requirements (#1265) --- examples/open_deep_research/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/open_deep_research/requirements.txt b/examples/open_deep_research/requirements.txt index 4fe0e0e2a..fe6c98ef2 100644 --- a/examples/open_deep_research/requirements.txt +++ b/examples/open_deep_research/requirements.txt @@ -20,7 +20,6 @@ pypdf>=5.1.0 python-dotenv>=1.0.1 python_pptx>=1.0.2 Requests>=2.32.3 -serpapi>=0.1.5 tqdm>=4.66.4 torch>=2.2.2 torchvision>=0.17.2 @@ -37,4 +36,4 @@ PyPDF2 python-pptx torch xlrd -SpeechRecognition \ No newline at end of file +SpeechRecognition From 5bab68a5146f7a4dcb3cc87eedcd0f11bce2465c Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Wed, 30 Apr 2025 16:22:28 +0200 Subject: [PATCH 07/25] Lower Pillow dependency minimum version to 10.0.1 (#1270) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 78f811a86..e1f649e38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ dependencies = [ "requests>=2.32.3", "rich>=13.9.4", "jinja2>=3.1.4", - "pillow>=11.0.0", + "pillow>=10.0.1", # Security fix for CVE-2023-4863: https://pillow.readthedocs.io/en/stable/releasenotes/10.0.1.html "markdownify>=0.14.1", "duckduckgo-search>=6.3.7", "python-dotenv" From c9513b913327d058e5538f280a56d65befdb79cd Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Wed, 30 Apr 2025 16:35:05 +0200 Subject: [PATCH 08/25] Fix remote executor send_tools to skip pip install on empty package list (#1273) --- src/smolagents/remote_executors.py | 28 ++++++++++++++++------------ tests/test_remote_executors.py | 12 +++++++++++- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/smolagents/remote_executors.py b/src/smolagents/remote_executors.py index acfe70020..db70954f5 100644 --- a/src/smolagents/remote_executors.py +++ b/src/smolagents/remote_executors.py @@ -53,19 +53,23 @@ def run_code_raise_errors(self, code: str, return_final_answer: bool = False) -> raise NotImplementedError def send_tools(self, tools: dict[str, Tool]): + code = "" + # Install tool packages + packages_to_install = { + pkg + for tool in tools.values() + for pkg in tool.to_dict()["requirements"] + if pkg not in self.installed_packages + } + if packages_to_install: + self.installed_packages.extend(packages_to_install) + code += f"!pip install {' '.join(packages_to_install)}\n" + # Get tool definitions tool_definition_code = get_tools_definition_code(tools) - - packages_to_install = set() - for tool in tools.values(): - for package in tool.to_dict()["requirements"]: - if package not in self.installed_packages: - packages_to_install.add(package) - self.installed_packages.append(package) - - execution = self.run_code_raise_errors( - f"!pip install {' '.join(packages_to_install)}\n" + tool_definition_code - ) - self.logger.log(execution[1]) + code += tool_definition_code + if code: + execution = self.run_code_raise_errors(code) + self.logger.log(execution[1]) def send_variables(self, variables: dict): """ diff --git a/tests/test_remote_executors.py b/tests/test_remote_executors.py index f7fe05ed2..f70a6eabe 100644 --- a/tests/test_remote_executors.py +++ b/tests/test_remote_executors.py @@ -8,12 +8,22 @@ from rich.console import Console from smolagents.monitoring import AgentLogger, LogLevel -from smolagents.remote_executors import DockerExecutor, E2BExecutor +from smolagents.remote_executors import DockerExecutor, E2BExecutor, RemotePythonExecutor from smolagents.utils import AgentError from .utils.markers import require_run_all +class TestRemotePythonExecutor: + def test_send_tools_empty_tools(self, monkeypatch): + executor = RemotePythonExecutor(additional_imports=[], logger=MagicMock()) + executor.run_code_raise_errors = MagicMock() + executor.send_tools({}) + assert executor.run_code_raise_errors.call_count == 1 + # No new packages should be installed + assert "!pip install" not in executor.run_code_raise_errors.call_args.args[0] + + class TestE2BExecutorMock: def test_e2b_executor_instantiation(self): logger = MagicMock() From b2f1232ad52917d759cfe7e4417f3330322ee6cb Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Wed, 30 Apr 2025 16:43:52 +0200 Subject: [PATCH 09/25] Remove smolagents installation from remote executors (#1274) --- src/smolagents/remote_executors.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/smolagents/remote_executors.py b/src/smolagents/remote_executors.py index db70954f5..e47f22015 100644 --- a/src/smolagents/remote_executors.py +++ b/src/smolagents/remote_executors.py @@ -59,7 +59,7 @@ def send_tools(self, tools: dict[str, Tool]): pkg for tool in tools.values() for pkg in tool.to_dict()["requirements"] - if pkg not in self.installed_packages + if pkg not in self.installed_packages + ["smolagents"] } if packages_to_install: self.installed_packages.extend(packages_to_install) @@ -90,9 +90,9 @@ def __call__(self, code_action: str) -> tuple[Any, str, bool]: return output[0], output[1], is_final_answer def install_packages(self, additional_imports: list[str]): - additional_imports = additional_imports + ["smolagents"] - _, execution_logs = self.run_code_raise_errors(f"!pip install {' '.join(additional_imports)}") - self.logger.log(execution_logs) + if additional_imports: + _, execution_logs = self.run_code_raise_errors(f"!pip install {' '.join(additional_imports)}") + self.logger.log(execution_logs) return additional_imports From c6bec40910b3eaa0d86016cc8ddd6ee3f90500b9 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Fri, 2 May 2025 14:57:53 +0200 Subject: [PATCH 10/25] Hotfix CI: pin mcp < 1.7.0 (#1285) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e1f649e38..057d4a3bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,7 @@ litellm = [ ] mcp = [ "mcpadapt>=0.0.19", # Security fix - "mcp", + "mcp<1.7.0", # Hotfix for GH-1284 ] mlx-lm = [ "mlx-lm" From 0dfac36bf0bcd0fd296cd510fcf3d428d98c1004 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Fri, 2 May 2025 16:03:03 +0200 Subject: [PATCH 11/25] Remove duckduckgo-search as required dependency (#1271) --- README.md | 4 ++-- docs/source/en/examples/multiagents.mdx | 2 +- docs/source/en/guided_tour.mdx | 3 ++- docs/source/en/tutorials/inspect_runs.mdx | 2 +- examples/open_deep_research/visual_vs_text_browser.ipynb | 2 +- pyproject.toml | 6 ++++-- 6 files changed, 11 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 9d90637fb..8bbca4734 100644 --- a/README.md +++ b/README.md @@ -51,9 +51,9 @@ Full documentation can be found [here](https://huggingface.co/docs/smolagents/in ## Quick demo -First install the package. +First install the package with a default set of tools: ```bash -pip install smolagents +pip install smolagents[toolkit] ``` Then define your agent, give it the tools it needs and run it! ```py diff --git a/docs/source/en/examples/multiagents.mdx b/docs/source/en/examples/multiagents.mdx index 4e43f99f5..f161a08e6 100644 --- a/docs/source/en/examples/multiagents.mdx +++ b/docs/source/en/examples/multiagents.mdx @@ -25,7 +25,7 @@ Let's set up this system. Run the line below to install the required dependencies: ```py -! pip install markdownify duckduckgo-search smolagents --upgrade -q +!pip install markdownify smolagents[toolkit] --upgrade -q ``` Let's login to HF in order to call Inference Providers: diff --git a/docs/source/en/guided_tour.mdx b/docs/source/en/guided_tour.mdx index 01e247357..d63700d06 100644 --- a/docs/source/en/guided_tour.mdx +++ b/docs/source/en/guided_tour.mdx @@ -281,7 +281,7 @@ When the agent is initialized, the tool attributes are used to generate a tool d ### Default toolbox -`smolagents` comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument `add_base_tools=True`: +If you install `smolagents` with the "toolkit" extra, it comes with a default toolbox for empowering agents, that you can add to your agent upon initialization with argument `add_base_tools=True`: - **DuckDuckGo web search***: performs a web search using DuckDuckGo browser. - **Python code interpreter**: runs your LLM generated Python code in a secure environment. This tool will only be added to [`ToolCallingAgent`] if you initialize it with `add_base_tools=True`, since code-based agent can already natively execute Python code @@ -290,6 +290,7 @@ When the agent is initialized, the tool attributes are used to generate a tool d You can manually use a tool by calling it with its arguments. ```python +# !pip install smolagents[toolkit] from smolagents import DuckDuckGoSearchTool search_tool = DuckDuckGoSearchTool() diff --git a/docs/source/en/tutorials/inspect_runs.mdx b/docs/source/en/tutorials/inspect_runs.mdx index 333db728b..a5cb664a0 100644 --- a/docs/source/en/tutorials/inspect_runs.mdx +++ b/docs/source/en/tutorials/inspect_runs.mdx @@ -30,7 +30,7 @@ Here's how it then looks like on the platform: First install the required packages. Here we install [Phoenix by Arize AI](https://github.com/Arize-ai/phoenix) because that's a good solution to collect and inspect the logs, but there are other OpenTelemetry-compatible platforms that you could use for this collection & inspection part. ```shell -pip install 'smolagents[telemetry]' +pip install 'smolagents[telemetry,toolkit]' ``` Then run the collector in the background. diff --git a/examples/open_deep_research/visual_vs_text_browser.ipynb b/examples/open_deep_research/visual_vs_text_browser.ipynb index 4a85a465a..7015c4905 100644 --- a/examples/open_deep_research/visual_vs_text_browser.ipynb +++ b/examples/open_deep_research/visual_vs_text_browser.ipynb @@ -15,7 +15,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install \"smolagents[litellm]\" -q" + "!pip install \"smolagents[litellm,toolkit]\" -q" ] }, { diff --git a/pyproject.toml b/pyproject.toml index 057d4a3bd..82592c94b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,6 @@ dependencies = [ "jinja2>=3.1.4", "pillow>=10.0.1", # Security fix for CVE-2023-4863: https://pillow.readthedocs.io/en/stable/releasenotes/10.0.1.html "markdownify>=0.14.1", - "duckduckgo-search>=6.3.7", "python-dotenv" ] @@ -65,6 +64,9 @@ telemetry = [ "opentelemetry-exporter-otlp", "openinference-instrumentation-smolagents>=0.1.4" ] +toolkit = [ + "duckduckgo-search>=6.3.7", # DuckDuckGoSearchTool +] transformers = [ "accelerate", "transformers>=4.0.0", @@ -79,7 +81,7 @@ vllm = [ "torch" ] all = [ - "smolagents[audio,docker,e2b,gradio,litellm,mcp,mlx-lm,openai,telemetry,transformers,vision,bedrock]", + "smolagents[audio,docker,e2b,gradio,litellm,mcp,mlx-lm,openai,telemetry,toolkit,transformers,vision,bedrock]", ] quality = [ "ruff>=0.9.0", From 36783c31c695555c6f10d17ecba0828c81e2b70e Mon Sep 17 00:00:00 2001 From: Parteek Date: Sun, 4 May 2025 10:43:13 +0530 Subject: [PATCH 12/25] Allow VisitWebpageTool to run on remote executors without installing smolagents (#1288) Co-authored-by: Parteek Kamboj Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> --- src/smolagents/default_tools.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/smolagents/default_tools.py b/src/smolagents/default_tools.py index d12a38d5a..e09e25587 100644 --- a/src/smolagents/default_tools.py +++ b/src/smolagents/default_tools.py @@ -227,6 +227,15 @@ def __init__(self, max_output_length: int = 40000): super().__init__() self.max_output_length = max_output_length + def _truncate_content(self, content: str, max_length: int) -> str: + if len(content) <= max_length: + return content + return ( + content[: max_length // 2] + + f"\n..._This content has been truncated to stay below {max_length} characters_...\n" + + content[-max_length // 2 :] + ) + def forward(self, url: str) -> str: try: import re @@ -234,8 +243,6 @@ def forward(self, url: str) -> str: import requests from markdownify import markdownify from requests.exceptions import RequestException - - from smolagents.utils import truncate_content except ImportError as e: raise ImportError( "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`." @@ -251,7 +258,7 @@ def forward(self, url: str) -> str: # Remove multiple line breaks markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content) - return truncate_content(markdown_content, self.max_output_length) + return self._truncate_content(markdown_content, self.max_output_length) except requests.exceptions.Timeout: return "The request timed out. Please try again later or check the URL." From 77d140dcb8dfe067efe9c95e06d0793493a55abd Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Mon, 5 May 2025 11:58:20 +0200 Subject: [PATCH 13/25] Refactor RemotePythonExecutor.send_tools to call install_packages (#1281) --- src/smolagents/remote_executors.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/smolagents/remote_executors.py b/src/smolagents/remote_executors.py index e47f22015..400a03a87 100644 --- a/src/smolagents/remote_executors.py +++ b/src/smolagents/remote_executors.py @@ -53,7 +53,6 @@ def run_code_raise_errors(self, code: str, return_final_answer: bool = False) -> raise NotImplementedError def send_tools(self, tools: dict[str, Tool]): - code = "" # Install tool packages packages_to_install = { pkg @@ -62,11 +61,9 @@ def send_tools(self, tools: dict[str, Tool]): if pkg not in self.installed_packages + ["smolagents"] } if packages_to_install: - self.installed_packages.extend(packages_to_install) - code += f"!pip install {' '.join(packages_to_install)}\n" + self.installed_packages += self.install_packages(list(packages_to_install)) # Get tool definitions - tool_definition_code = get_tools_definition_code(tools) - code += tool_definition_code + code = get_tools_definition_code(tools) if code: execution = self.run_code_raise_errors(code) self.logger.log(execution[1]) From 095d8664f64ac3c40600cb42e99274767ffa492e Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Mon, 5 May 2025 11:59:52 +0200 Subject: [PATCH 14/25] Remove unnecessary dependencies from DockerExecutor image (#1272) --- src/smolagents/remote_executors.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/smolagents/remote_executors.py b/src/smolagents/remote_executors.py index 400a03a87..aed0445d1 100644 --- a/src/smolagents/remote_executors.py +++ b/src/smolagents/remote_executors.py @@ -218,14 +218,18 @@ def __init__( dockerfile_path = Path(__file__).parent / "Dockerfile" if not dockerfile_path.exists(): with open(dockerfile_path, "w") as f: - f.write("""FROM python:3.12-slim - -RUN pip install jupyter_kernel_gateway requests numpy pandas -RUN pip install jupyter_client notebook - -EXPOSE 8888 -CMD ["jupyter", "kernelgateway", "--KernelGatewayApp.ip='0.0.0.0'", "--KernelGatewayApp.port=8888", "--KernelGatewayApp.allow_origin='*'"] -""") + f.write( + dedent( + """\ + FROM python:3.12-slim + + RUN pip install jupyter_kernel_gateway jupyter_client + + EXPOSE 8888 + CMD ["jupyter", "kernelgateway", "--KernelGatewayApp.ip='0.0.0.0'", "--KernelGatewayApp.port=8888", "--KernelGatewayApp.allow_origin='*'"] + """ + ) + ) _, build_logs = self.client.images.build( path=str(dockerfile_path.parent), dockerfile=str(dockerfile_path), tag=self.image_name ) From 8c6994a1fd77f834a18876f2601ee1d67989e12a Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Mon, 5 May 2025 12:01:44 +0200 Subject: [PATCH 15/25] Unpin mcp < 1.7.0 (#1289) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 82592c94b..26a9000be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,7 @@ litellm = [ ] mcp = [ "mcpadapt>=0.0.19", # Security fix - "mcp<1.7.0", # Hotfix for GH-1284 + "mcp", ] mlx-lm = [ "mlx-lm" From d6256b0c46932b7938b610b3dac3afe580d58511 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Mon, 5 May 2025 12:03:03 +0200 Subject: [PATCH 16/25] Add Installation docs (#1293) --- docs/source/en/_toctree.yml | 2 + docs/source/en/installation.mdx | 114 ++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+) create mode 100644 docs/source/en/installation.mdx diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index c5c2a9a93..08436f275 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -2,6 +2,8 @@ sections: - local: index title: 🤗 Agents + - local: installation + title: Installation - local: guided_tour title: Guided tour - title: Tutorials diff --git a/docs/source/en/installation.mdx b/docs/source/en/installation.mdx new file mode 100644 index 000000000..53069e61e --- /dev/null +++ b/docs/source/en/installation.mdx @@ -0,0 +1,114 @@ +# Installation Guide + +The `smolagents` library can be installed using pip. Here are the different installation methods and options available. + +## Prerequisites +- Python 3.10 or newer +- pip + +## Basic Installation + +Install `smolagents` core library with: +```bash +pip install smolagents +``` + +## Installation with Extras + +`smolagents` provides several optional dependencies (extras) that can be installed based on your needs. +You can install these extras using the following syntax: +```bash +pip install "smolagents[extra1,extra2]" +``` + +### Tools +These extras include various tools and integrations: +- **toolkit**: Install a default set of tools for common tasks. + ```bash + pip install "smolagents[toolkit]" + ``` +- **mcp**: Add support for the Model Context Protocol (MCP) to integrate with external tools and services. + ```bash + pip install "smolagents[mcp]" + ``` + +### Model Integration +These extras enable integration with various AI models and frameworks: +- **openai**: Add support for OpenAI API models. + ```bash + pip install "smolagents[openai]" + ``` +- **transformers**: Enable Hugging Face Transformers models. + ```bash + pip install "smolagents[transformers]" + ``` +- **vllm**: Add VLLM support for efficient model inference. + ```bash + pip install "smolagents[vllm]" + ``` +- **mlx-lm**: Enable support for MLX-LM models. + ```bash + pip install "smolagents[mlx-lm]" + ``` +- **litellm**: Add LiteLLM support for lightweight model inference. + ```bash + pip install "smolagents[litellm]" + ``` +- **bedrock**: Enable support for AWS Bedrock models. + ```bash + pip install "smolagents[bedrock]" + ``` + +### Multimodal Capabilities +Extras for handling different types of media and input: +- **vision**: Add support for image processing and computer vision tasks. + ```bash + pip install "smolagents[vision]" + ``` +- **audio**: Enable audio processing capabilities. + ```bash + pip install "smolagents[audio]" + ``` + +### Remote Execution +Extras for executing code remotely: +- **docker**: Add support for executing code in Docker containers. + ```bash + pip install "smolagents[docker]" + ``` +- **e2b**: Enable E2B support for remote execution. + ```bash + pip install "smolagents[e2b]" + ``` + +### Telemetry and User Interface +Extras for telemetry, monitoring and user interface components: +- **telemetry**: Add support for monitoring and tracing. + ```bash + pip install "smolagents[telemetry]" + ``` +- **gradio**: Add support for interactive Gradio UI components. + ```bash + pip install "smolagents[gradio]" + ``` + +### Complete Installation +To install all available extras, you can use: +```bash +pip install "smolagents[all]" +``` + +## Verifying Installation +After installation, you can verify that `smolagents` is installed correctly by running: +```python +import smolagents +print(smolagents.__version__) +``` + +## Next Steps +Once you have successfully installed `smolagents`, you can: +- Follow the [guided tour](./guided_tour) to learn the basics. +- Explore the [how-to guides](./examples/text_to_sql) for practical examples. +- Read the [conceptual guides](./conceptual_guides/intro_agents) for high-level explanations. +- Check out the [tutorials](./tutorials/building_good_agents) for in-depth tutorials on building agents. +- Explore the [API reference](./reference/index) for detailed information on classes and functions. From a08f345c6c5968c47a087aed98e6f753cdfe7d7b Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Mon, 5 May 2025 12:03:49 +0200 Subject: [PATCH 17/25] Support tools with union type return (#1294) --- src/smolagents/_function_type_hints_utils.py | 8 ++++++++ tests/test_function_type_hints_utils.py | 4 ++-- tests/test_tools.py | 14 ++++++++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/smolagents/_function_type_hints_utils.py b/src/smolagents/_function_type_hints_utils.py index e5a367c08..f8f677088 100644 --- a/src/smolagents/_function_type_hints_utils.py +++ b/src/smolagents/_function_type_hints_utils.py @@ -290,6 +290,14 @@ def _convert_type_hints_to_json_schema(func: Callable, error_on_missing_type_hin else: properties[param_name]["nullable"] = True + # Return: multi‐type union -> treat as any + if ( + "return" in properties + and (return_type := properties["return"].get("type")) + and not isinstance(return_type, str) + ): + properties["return"]["type"] = "any" + schema = {"type": "object", "properties": properties} if required: schema["required"] = required diff --git a/tests/test_function_type_hints_utils.py b/tests/test_function_type_hints_utils.py index fdb55f200..13b279069 100644 --- a/tests/test_function_type_hints_utils.py +++ b/tests/test_function_type_hints_utils.py @@ -383,8 +383,8 @@ def test_union_types(self, union_types_func): return_prop = schema["function"]["return"] # Check union in parameter assert len(value_prop["type"]) == 2 - # Check union in return type - assert len(return_prop["type"]) == 2 + # Check union in return type: should be converted to "any" + assert return_prop["type"] == "any" def test_nested_types(self, nested_types_func): """Test schema generation for nested complex types.""" diff --git a/tests/test_tools.py b/tests/test_tools.py index f82c08753..e9f842f6c 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -551,6 +551,20 @@ def test_function(items: list[str]) -> str: # Original function should not have 'self' parameter assert "self" not in original_signature.parameters + def test_tool_with_union_type_return(self): + @tool + def union_type_return_tool_function(param: int) -> str | bool: + """ + Tool with output union type. + + Args: + param: Input parameter. + """ + return str(param) if param > 0 else False + + assert isinstance(union_type_return_tool_function, Tool) + assert union_type_return_tool_function.output_type == "any" + @pytest.fixture def mock_server_parameters(): From 244b71af550a8091cfac90607d795e350932471a Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Mon, 5 May 2025 15:31:32 +0200 Subject: [PATCH 18/25] Fix WikipediaSearchTool in remote executors (#1283) --- src/smolagents/_function_type_hints_utils.py | 20 +++++++++++++++++++- tests/test_remote_executors.py | 12 +++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/smolagents/_function_type_hints_utils.py b/src/smolagents/_function_type_hints_utils.py index f8f677088..d3045420e 100644 --- a/src/smolagents/_function_type_hints_utils.py +++ b/src/smolagents/_function_type_hints_utils.py @@ -38,6 +38,24 @@ ) +IMPORT_TO_PACKAGE_MAPPING = { + "wikipediaapi": "wikipedia-api", +} + + +def get_package_name(import_name: str) -> str: + """ + Return the package name for a given import name. + + Args: + import_name (`str`): Import name to get the package name for. + + Returns: + `str`: Package name for the given import name. + """ + return IMPORT_TO_PACKAGE_MAPPING.get(import_name, import_name) + + def get_imports(code: str) -> list[str]: """ Extracts all the libraries (not relative imports) that are imported in a code. @@ -65,7 +83,7 @@ def get_imports(code: str) -> list[str]: imports += re.findall(r"^\s*from\s+(\S+)\s+import", code, flags=re.MULTILINE) # Only keep the top-level module imports = [imp.split(".")[0] for imp in imports if not imp.startswith(".")] - return list(set(imports)) + return [get_package_name(import_name) for import_name in set(imports)] class TypeHintParsingException(Exception): diff --git a/tests/test_remote_executors.py b/tests/test_remote_executors.py index f70a6eabe..20f9fbae8 100644 --- a/tests/test_remote_executors.py +++ b/tests/test_remote_executors.py @@ -7,6 +7,7 @@ import pytest from rich.console import Console +from smolagents.default_tools import WikipediaSearchTool from smolagents.monitoring import AgentLogger, LogLevel from smolagents.remote_executors import DockerExecutor, E2BExecutor, RemotePythonExecutor from smolagents.utils import AgentError @@ -15,7 +16,7 @@ class TestRemotePythonExecutor: - def test_send_tools_empty_tools(self, monkeypatch): + def test_send_tools_empty_tools(self): executor = RemotePythonExecutor(additional_imports=[], logger=MagicMock()) executor.run_code_raise_errors = MagicMock() executor.send_tools({}) @@ -23,6 +24,15 @@ def test_send_tools_empty_tools(self, monkeypatch): # No new packages should be installed assert "!pip install" not in executor.run_code_raise_errors.call_args.args[0] + @require_run_all + def test_send_tools_with_default_wikipedia_search_tool(self): + tool = WikipediaSearchTool() + executor = RemotePythonExecutor(additional_imports=[], logger=MagicMock()) + executor.run_code_raise_errors = MagicMock() + executor.send_tools({"wikipedia_search": tool}) + assert executor.run_code_raise_errors.call_count == 1 + assert "!pip install wikipedia-api" in executor.run_code_raise_errors.call_args.args[0] + class TestE2BExecutorMock: def test_e2b_executor_instantiation(self): From bd9c807a526d622c5f31f8e1e0e011ef937a3021 Mon Sep 17 00:00:00 2001 From: Luis Date: Tue, 6 May 2025 05:15:09 -0400 Subject: [PATCH 19/25] docs: fix typo (#1253) Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> --- docs/source/en/guided_tour.mdx | 4 ++-- docs/source/en/tutorials/tools.mdx | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/en/guided_tour.mdx b/docs/source/en/guided_tour.mdx index d63700d06..fcf801f82 100644 --- a/docs/source/en/guided_tour.mdx +++ b/docs/source/en/guided_tour.mdx @@ -340,7 +340,7 @@ def model_download_tool(task: str) -> str: The function needs: - A clear name. The name should be descriptive enough of what this tool does to help the LLM brain powering the agent. Since this tool returns the model with the most downloads for a task, let's name it `model_download_tool`. - Type hints on both inputs and output -- A description, that includes an 'Args:' part where each argument is described (without a type indication this time, it will be pulled from the type hint). Same as for the tool name, this description is an instruction manual for the LLM powering you agent, so do not neglect it. +- A description, that includes an 'Args:' part where each argument is described (without a type indication this time, it will be pulled from the type hint). Same as for the tool name, this description is an instruction manual for the LLM powering your agent, so do not neglect it. All these elements will be automatically baked into the agent's system prompt upon initialization: so strive to make them as clear as possible! @@ -365,7 +365,7 @@ class ModelDownloadTool(Tool): The subclass needs the following attributes: - A clear `name`. The name should be descriptive enough of what this tool does to help the LLM brain powering the agent. Since this tool returns the model with the most downloads for a task, let's name it `model_download_tool`. -- A `description`. Same as for the `name`, this description is an instruction manual for the LLM powering you agent, so do not neglect it. +- A `description`. Same as for the `name`, this description is an instruction manual for the LLM powering your agent, so do not neglect it. - Input types and descriptions - Output type All these attributes will be automatically baked into the agent's system prompt upon initialization: so strive to make them as clear as possible! diff --git a/docs/source/en/tutorials/tools.mdx b/docs/source/en/tutorials/tools.mdx index a6b24d280..cf2b38942 100644 --- a/docs/source/en/tutorials/tools.mdx +++ b/docs/source/en/tutorials/tools.mdx @@ -99,7 +99,7 @@ model_download_tool = load_tool( You can directly import a Gradio Space from the Hub as a tool using the [`Tool.from_space`] method! -You only need to provide the id of the Space on the Hub, its name, and a description that will help you agent understand what the tool does. Under the hood, this will use [`gradio-client`](https://pypi.org/project/gradio-client/) library to call the Space. +You only need to provide the id of the Space on the Hub, its name, and a description that will help your agent understand what the tool does. Under the hood, this will use [`gradio-client`](https://pypi.org/project/gradio-client/) library to call the Space. For instance, let's import the [FLUX.1-dev](https://huggingface.co/black-forest-labs/FLUX.1-dev) Space from the Hub and use it to generate an image. From 5e86ac25bca11ca018cfaf89a5a7d617693075d4 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 6 May 2025 20:27:49 +0200 Subject: [PATCH 20/25] Add docs about Using different models: Gemini models (#1297) --- docs/source/en/_toctree.yml | 2 + .../en/examples/using_different_models.mdx | 48 +++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 docs/source/en/examples/using_different_models.mdx diff --git a/docs/source/en/_toctree.yml b/docs/source/en/_toctree.yml index 08436f275..9f7d6ce6e 100644 --- a/docs/source/en/_toctree.yml +++ b/docs/source/en/_toctree.yml @@ -34,6 +34,8 @@ title: Orchestrate a multi-agent system - local: examples/web_browser title: Build a web browser agent using vision models + - local: examples/using_different_models + title: Using different models - title: Reference sections: - local: reference/agents diff --git a/docs/source/en/examples/using_different_models.mdx b/docs/source/en/examples/using_different_models.mdx new file mode 100644 index 000000000..1c153d349 --- /dev/null +++ b/docs/source/en/examples/using_different_models.mdx @@ -0,0 +1,48 @@ +# Using different models + +[[open-in-colab]] + +`smolagents` provides a flexible framework that allows you to use various language models from different providers. +This guide will show you how to use different model types with your agents. + +## Available model types + +`smolagents` supports several model types out of the box: +1. [`InferenceClientModel`]: Uses Hugging Face's Inference API to access models +2. [`TransformersModel`]: Runs models locally using the Transformers library +3. [`VLLMModel`]: Uses vLLM for fast inference with optimized serving +4. [`MLXModel`]: Optimized for Apple Silicon devices using MLX +5. [`LiteLLMModel`]: Provides access to hundreds of LLMs through LiteLLM +6. [`LiteLLMRouterModel`]: Distributes requests among multiple models +7. [`OpenAIServerModel`]: Connects to OpenAI's API +8. [`AzureOpenAIServerModel`]: Uses Azure's OpenAI service +9. [`AmazonBedrockServerModel`]: Connects to AWS Bedrock's API + +## Using Google Gemini Models + +As explained in the Google Gemini API documentation (https://ai.google.dev/gemini-api/docs/openai), +Google provides an OpenAI-compatible API for Gemini models, allowing you to use the [`OpenAIServerModel`] +with Gemini models by setting the appropriate base URL. + +First, install the required dependencies: +```bash +pip install smolagents[openai] +``` + +Then, [get a Gemini API key](https://ai.google.dev/gemini-api/docs/api-key) and set it in your code: +```python +GEMINI_API_KEY = +``` + +Now, you can initialize the Gemini model using the `OpenAIServerModel` class +and setting the `api_base` parameter to the Gemini API base URL: +```python +from smolagents import OpenAIServerModel + +model = OpenAIServerModel( + model_id="gemini-2.0-flash", + api_key=GEMINI_API_KEY, + # Google Gemini OpenAI-compatible API base URL + api_base="https://generativelanguage.googleapis.com/v1beta/openai/", +) +``` From e38444c1fff3e24d873f0023dd40e2cc5e30ae68 Mon Sep 17 00:00:00 2001 From: HeardACat <2498638+HeardACat@users.noreply.github.com> Date: Thu, 8 May 2025 00:39:28 +1000 Subject: [PATCH 21/25] Fix Custom Final Answer Tool for ToolCallingAgent (#1255) Co-authored-by: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> --- src/smolagents/agents.py | 8 +++--- tests/test_agents.py | 54 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 6 deletions(-) diff --git a/src/smolagents/agents.py b/src/smolagents/agents.py index d052f144a..a276018f5 100644 --- a/src/smolagents/agents.py +++ b/src/smolagents/agents.py @@ -1093,16 +1093,16 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[Any]: answer = tool_arguments else: answer = tool_arguments - if ( - isinstance(answer, str) and answer in self.state.keys() - ): # if the answer is a state variable, return the value + if isinstance(answer, str) and answer in self.state.keys(): + # if the answer is a state variable, return the value + # State variables are not JSON-serializable (AgentImage, AgentAudio) so can't be passed as arguments to execute_tool_call final_answer = self.state[answer] self.logger.log( f"[bold {YELLOW_HEX}]Final answer:[/bold {YELLOW_HEX}] Extracting key '{answer}' from state to return value '{final_answer}'.", level=LogLevel.INFO, ) else: - final_answer = answer + final_answer = self.execute_tool_call("final_answer", {"answer": answer}) self.logger.log( Text(f"Final answer: {final_answer}", style=f"bold {YELLOW_HEX}"), level=LogLevel.INFO, diff --git a/tests/test_agents.py b/tests/test_agents.py index 31e188a64..0aca8543e 100644 --- a/tests/test_agents.py +++ b/tests/test_agents.py @@ -15,7 +15,6 @@ import io import os import tempfile -import unittest import uuid from collections.abc import Generator from contextlib import nullcontext as does_not_raise @@ -963,7 +962,7 @@ def test_from_dict(self): assert agent.max_steps == 30 -class TestToolCallingAgent(unittest.TestCase): +class TestToolCallingAgent: @patch("huggingface_hub.InferenceClient") def test_toolcalling_agent_api(self, mock_inference_client): mock_client = mock_inference_client.return_value @@ -1040,6 +1039,57 @@ def test_toolcalling_agent_api_misformatted_output(self, mock_inference_client): assert "Error while parsing" in capture.get() assert len(agent.memory.steps) == 4 + def test_change_tools_after_init(self): + from smolagents import tool + + @tool + def fake_tool_1() -> str: + """Fake tool""" + return "1" + + @tool + def fake_tool_2() -> str: + """Fake tool""" + return "2" + + class FakeCodeModel(Model): + def generate(self, messages, tools_to_call_from=None, stop_sequences=None, grammar=None): + if len(messages) < 3: + return ChatMessage( + role="assistant", + content="", + tool_calls=[ + ChatMessageToolCall( + id="call_0", + type="function", + function=ChatMessageToolCallDefinition(name="fake_tool_1", arguments={}), + ) + ], + ) + else: + tool_result = messages[-1]["content"][0]["text"].removeprefix("Observation:\n") + return ChatMessage( + role="assistant", + content="", + tool_calls=[ + ChatMessageToolCall( + id="call_1", + type="function", + function=ChatMessageToolCallDefinition( + name="final_answer", arguments={"answer": tool_result} + ), + ) + ], + ) + + agent = ToolCallingAgent(tools=[fake_tool_1], model=FakeCodeModel()) + + agent.tools["final_answer"] = CustomFinalAnswerTool() + agent.tools["fake_tool_1"] = fake_tool_2 + + answer = agent.run("Fake task.") + assert answer == "2CUSTOM" + class TestCodeAgent: @pytest.mark.parametrize("provide_run_summary", [False, True]) From 36f1d4478c765f9cc0cd35297a72b61089f4191c Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Wed, 7 May 2025 16:40:31 +0200 Subject: [PATCH 22/25] Implement SimpleWebSearchTool (#1300) --- src/smolagents/default_tools.py | 81 +++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/src/smolagents/default_tools.py b/src/smolagents/default_tools.py index e09e25587..a8677e405 100644 --- a/src/smolagents/default_tools.py +++ b/src/smolagents/default_tools.py @@ -210,6 +210,87 @@ def forward(self, query: str, filter_year: int | None = None) -> str: return "## Search Results\n" + "\n\n".join(web_snippets) +class SimpleWebSearchTool(Tool): + name = "web_search" + description = "Performs a web search for a query and returns a string of the top search results formatted as markdown with titles, links, and descriptions." + inputs = {"query": {"type": "string", "description": "The search query to perform."}} + output_type = "string" + + def __init__(self, max_results=10): + super().__init__() + self.max_results = max_results + + def forward(self, query: str) -> str: + results = self.search_duckduckgo(query) + if len(results) == 0: + raise Exception("No results found! Try a less restrictive/shorter query.") + return self.parse_results(results) + + def parse_results(self, results: list) -> str: + return "## Search Results\n\n" + "\n\n".join( + [f"[{result['title']}]({result['link']})\n{result['description']}" for result in results] + ) + + def search_duckduckgo(self, query: str) -> list: + import requests + + response = requests.get( + "https://lite.duckduckgo.com/lite/", + params={"q": query}, + headers={"User-Agent": "Mozilla/5.0"}, + ) + response.raise_for_status() + parser = self._create_duckduckgo_parser() + parser.feed(response.text) + return parser.results + + def _create_duckduckgo_parser(self): + from html.parser import HTMLParser + + class SimpleResultParser(HTMLParser): + def __init__(self): + super().__init__() + self.results = [] + self.current = {} + self.capture_title = False + self.capture_description = False + self.capture_link = False + + def handle_starttag(self, tag, attrs): + attrs = dict(attrs) + if tag == "a" and attrs.get("class") == "result-link": + self.capture_title = True + elif tag == "td" and attrs.get("class") == "result-snippet": + self.capture_description = True + elif tag == "span" and attrs.get("class") == "link-text": + self.capture_link = True + + def handle_endtag(self, tag): + if tag == "a" and self.capture_title: + self.capture_title = False + elif tag == "td" and self.capture_description: + self.capture_description = False + elif tag == "span" and self.capture_link: + self.capture_link = False + elif tag == "tr": + # Store current result if all parts are present + if {"title", "description", "link"} <= self.current.keys(): + self.current["description"] = " ".join(self.current["description"]) + self.results.append(self.current) + self.current = {} + + def handle_data(self, data): + if self.capture_title: + self.current["title"] = data.strip() + elif self.capture_description: + self.current.setdefault("description", []) + self.current["description"].append(data.strip()) + elif self.capture_link: + self.current["link"] = "https://" + data.strip() + + return SimpleResultParser() + + class VisitWebpageTool(Tool): name = "visit_webpage" description = ( From 76292232b2dfc083da658fc61854f525efa28a22 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Wed, 7 May 2025 16:41:19 +0200 Subject: [PATCH 23/25] Remove markdownify as required dependency (#1275) --- docs/source/en/examples/multiagents.mdx | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/en/examples/multiagents.mdx b/docs/source/en/examples/multiagents.mdx index f161a08e6..d2976b726 100644 --- a/docs/source/en/examples/multiagents.mdx +++ b/docs/source/en/examples/multiagents.mdx @@ -25,7 +25,7 @@ Let's set up this system. Run the line below to install the required dependencies: ```py -!pip install markdownify smolagents[toolkit] --upgrade -q +!pip install smolagents[toolkit] --upgrade -q ``` Let's login to HF in order to call Inference Providers: diff --git a/pyproject.toml b/pyproject.toml index 26a9000be..c87df62d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,6 @@ dependencies = [ "rich>=13.9.4", "jinja2>=3.1.4", "pillow>=10.0.1", # Security fix for CVE-2023-4863: https://pillow.readthedocs.io/en/stable/releasenotes/10.0.1.html - "markdownify>=0.14.1", "python-dotenv" ] @@ -66,6 +65,7 @@ telemetry = [ ] toolkit = [ "duckduckgo-search>=6.3.7", # DuckDuckGoSearchTool + "markdownify>=0.14.1", # VisitWebpageTool ] transformers = [ "accelerate", From 98661a89d98ae5be3b3961445ca55a4573634b39 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Wed, 7 May 2025 17:23:05 +0200 Subject: [PATCH 24/25] Replace DuckDuckGoSearchTool with WebSearchTool in docs (#1303) --- README.md | 4 ++-- docs/source/en/examples/multiagents.mdx | 8 ++++---- docs/source/en/guided_tour.mdx | 10 +++++----- docs/source/en/reference/tools.mdx | 4 ++++ docs/source/en/tutorials/building_good_agents.mdx | 4 ++-- docs/source/en/tutorials/inspect_runs.mdx | 8 ++++---- docs/source/en/tutorials/memory.mdx | 2 +- docs/source/hi/examples/multiagents.mdx | 8 ++++---- docs/source/hi/guided_tour.mdx | 10 +++++----- docs/source/hi/tutorials/building_good_agents.mdx | 4 ++-- docs/source/hi/tutorials/inspect_runs.mdx | 4 ++-- docs/source/zh/examples/multiagents.mdx | 8 ++++---- docs/source/zh/guided_tour.mdx | 10 +++++----- docs/source/zh/tutorials/building_good_agents.mdx | 4 ++-- docs/source/zh/tutorials/inspect_runs.mdx | 8 ++++---- docs/source/zh/tutorials/memory.mdx | 2 +- examples/inspect_multiagent_run.py | 4 ++-- examples/multi_llm_agent.py | 4 ++-- .../open_deep_research/visual_vs_text_browser.ipynb | 4 ++-- examples/sandboxed_execution.py | 6 +++--- src/smolagents/default_tools.py | 3 ++- src/smolagents/models.py | 4 ++-- src/smolagents/vision_web_browser.py | 4 ++-- 23 files changed, 66 insertions(+), 61 deletions(-) diff --git a/README.md b/README.md index 8bbca4734..37a5e8a3f 100644 --- a/README.md +++ b/README.md @@ -57,10 +57,10 @@ pip install smolagents[toolkit] ``` Then define your agent, give it the tools it needs and run it! ```py -from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel +from smolagents import CodeAgent, WebSearchTool, InferenceClientModel model = InferenceClientModel() -agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model) +agent = CodeAgent(tools=[WebSearchTool()], model=model) agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") ``` diff --git a/docs/source/en/examples/multiagents.mdx b/docs/source/en/examples/multiagents.mdx index d2976b726..231f8f0a8 100644 --- a/docs/source/en/examples/multiagents.mdx +++ b/docs/source/en/examples/multiagents.mdx @@ -46,9 +46,9 @@ model_id = "Qwen/Qwen2.5-Coder-32B-Instruct" ## 🔍 Create a web search tool -For web browsing, we can already use our pre-existing [`DuckDuckGoSearchTool`](https://github.com/huggingface/smolagents/blob/main/src/smolagents/default_tools.py#L151-L176) tool to provide a Google search equivalent. +For web browsing, we can already use our native [`WebSearchTool`] tool to provide a Google search equivalent. -But then we will also need to be able to peak into the page found by the `DuckDuckGoSearchTool`. +But then we will also need to be able to peak into the page found by the `WebSearchTool`. To do so, we could import the library's built-in `VisitWebpageTool`, but we will build it again to see how it's done. So let's create our `VisitWebpageTool` tool from scratch using `markdownify`. @@ -109,14 +109,14 @@ from smolagents import ( CodeAgent, ToolCallingAgent, InferenceClientModel, - DuckDuckGoSearchTool, + WebSearchTool, LiteLLMModel, ) model = InferenceClientModel(model_id=model_id) web_agent = ToolCallingAgent( - tools=[DuckDuckGoSearchTool(), visit_webpage], + tools=[WebSearchTool(), visit_webpage], model=model, max_steps=10, name="web_search_agent", diff --git a/docs/source/en/guided_tour.mdx b/docs/source/en/guided_tour.mdx index fcf801f82..859ced0ac 100644 --- a/docs/source/en/guided_tour.mdx +++ b/docs/source/en/guided_tour.mdx @@ -291,9 +291,9 @@ You can manually use a tool by calling it with its arguments. ```python # !pip install smolagents[toolkit] -from smolagents import DuckDuckGoSearchTool +from smolagents import WebSearchTool -search_tool = DuckDuckGoSearchTool() +search_tool = WebSearchTool() print(search_tool("Who's the current president of Russia?")) ``` @@ -424,15 +424,15 @@ You can easily build hierarchical multi-agent systems with `smolagents`. To do so, just ensure your agent has `name` and`description` attributes, which will then be embedded in the manager agent's system prompt to let it know how to call this managed agent, as we also do for tools. Then you can pass this managed agent in the parameter managed_agents upon initialization of the manager agent. -Here's an example of making an agent that managed a specific web search agent using our [`DuckDuckGoSearchTool`]: +Here's an example of making an agent that managed a specific web search agent using our native [`WebSearchTool`]: ```py -from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool +from smolagents import CodeAgent, InferenceClientModel, WebSearchTool model = InferenceClientModel() web_agent = CodeAgent( - tools=[DuckDuckGoSearchTool()], + tools=[WebSearchTool()], model=model, name="web_search", description="Runs web searches for you. Give it your query as an argument." diff --git a/docs/source/en/reference/tools.mdx b/docs/source/en/reference/tools.mdx index a5d217bb8..ee96f71e8 100644 --- a/docs/source/en/reference/tools.mdx +++ b/docs/source/en/reference/tools.mdx @@ -42,6 +42,10 @@ contains the API docs for the underlying classes. [[autodoc]] UserInputTool +### WebSearchTool + +[[autodoc]] WebSearchTool + ### DuckDuckGoSearchTool [[autodoc]] DuckDuckGoSearchTool diff --git a/docs/source/en/tutorials/building_good_agents.mdx b/docs/source/en/tutorials/building_good_agents.mdx index 53bda8f92..a9c2a79ac 100644 --- a/docs/source/en/tutorials/building_good_agents.mdx +++ b/docs/source/en/tutorials/building_good_agents.mdx @@ -397,7 +397,7 @@ This also works with the [`ToolCallingAgent`]. We provide a model for a supplementary planning step, that an agent can run regularly in-between normal action steps. In this step, there is no tool call, the LLM is simply asked to update a list of facts it knows and to reflect on what steps it should take next based on those facts. ```py -from smolagents import load_tool, CodeAgent, InferenceClientModel, DuckDuckGoSearchTool +from smolagents import load_tool, CodeAgent, InferenceClientModel, WebSearchTool from dotenv import load_dotenv load_dotenv() @@ -405,7 +405,7 @@ load_dotenv() # Import tool from Hub image_generation_tool = load_tool("m-ric/text-to-image", trust_remote_code=True) -search_tool = DuckDuckGoSearchTool() +search_tool = WebSearchTool() agent = CodeAgent( tools=[search_tool, image_generation_tool], diff --git a/docs/source/en/tutorials/inspect_runs.mdx b/docs/source/en/tutorials/inspect_runs.mdx index a5cb664a0..dbc141595 100644 --- a/docs/source/en/tutorials/inspect_runs.mdx +++ b/docs/source/en/tutorials/inspect_runs.mdx @@ -54,7 +54,7 @@ Then you can run your agents! from smolagents import ( CodeAgent, ToolCallingAgent, - DuckDuckGoSearchTool, + WebSearchTool, VisitWebpageTool, InferenceClientModel, ) @@ -62,7 +62,7 @@ from smolagents import ( model = InferenceClientModel() search_agent = ToolCallingAgent( - tools=[DuckDuckGoSearchTool(), VisitWebpageTool()], + tools=[WebSearchTool(), VisitWebpageTool()], model=model, name="search_agent", description="This is an agent that can do web search.", @@ -143,7 +143,7 @@ SmolagentsInstrumentor().instrument(tracer_provider=trace_provider) from smolagents import ( CodeAgent, ToolCallingAgent, - DuckDuckGoSearchTool, + WebSearchTool, VisitWebpageTool, InferenceClientModel, ) @@ -153,7 +153,7 @@ model = InferenceClientModel( ) search_agent = ToolCallingAgent( - tools=[DuckDuckGoSearchTool(), VisitWebpageTool()], + tools=[WebSearchTool(), VisitWebpageTool()], model=model, name="search_agent", description="This is an agent that can do web search.", diff --git a/docs/source/en/tutorials/memory.mdx b/docs/source/en/tutorials/memory.mdx index df982da82..ad35e337b 100644 --- a/docs/source/en/tutorials/memory.mdx +++ b/docs/source/en/tutorials/memory.mdx @@ -83,7 +83,7 @@ Then you should pass this function in the `step_callbacks` argument upon initial ```py CodeAgent( - tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f], + tools=[WebSearchTool(), go_back, close_popups, search_item_ctrl_f], model=model, additional_authorized_imports=["helium"], step_callbacks=[update_screenshot], diff --git a/docs/source/hi/examples/multiagents.mdx b/docs/source/hi/examples/multiagents.mdx index 7ee85f92d..1c17312e9 100644 --- a/docs/source/hi/examples/multiagents.mdx +++ b/docs/source/hi/examples/multiagents.mdx @@ -49,9 +49,9 @@ model_id = "Qwen/Qwen2.5-Coder-32B-Instruct" ## 🔍 एक वेब सर्च टूल बनाएं -वेब ब्राउज़िंग के लिए, हम पहले से मौजूद [`DuckDuckGoSearchTool`](https://github.com/huggingface/smolagents/blob/main/src/smolagents/default_tools.py#L151-L176) टूल का उपयोग कर सकते हैं जो Google search के समान सुविधा प्रदान करता है। +वेब ब्राउज़िंग के लिए, हम पहले से मौजूद [`WebSearchTool`] टूल का उपयोग कर सकते हैं जो Google search के समान सुविधा प्रदान करता है। -लेकिन फिर हमें `DuckDuckGoSearchTool` द्वारा खोजे गए पेज को देखने में भी सक्षम होने की आवश्यकता होगी। +लेकिन फिर हमें `WebSearchTool` द्वारा खोजे गए पेज को देखने में भी सक्षम होने की आवश्यकता होगी। ऐसा करने के लिए, हम लाइब्रेरी के बिल्ट-इन `VisitWebpageTool` को इम्पोर्ट कर सकते हैं, लेकिन हम इसे फिर से बनाएंगे यह देखने के लिए कि यह कैसे किया जाता है। तो आइए `markdownify` का उपयोग करके शुरू से अपना `VisitWebpageTool` टूल बनाएं। @@ -113,14 +113,14 @@ from smolagents import ( ToolCallingAgent, InferenceClientModel, ManagedAgent, - DuckDuckGoSearchTool, + WebSearchTool, LiteLLMModel, ) model = InferenceClientModel(model_id=model_id) web_agent = ToolCallingAgent( - tools=[DuckDuckGoSearchTool(), visit_webpage], + tools=[WebSearchTool(), visit_webpage], model=model, max_steps=10, ) diff --git a/docs/source/hi/guided_tour.mdx b/docs/source/hi/guided_tour.mdx index 1c7f5742e..59f3c39db 100644 --- a/docs/source/hi/guided_tour.mdx +++ b/docs/source/hi/guided_tour.mdx @@ -152,9 +152,9 @@ agent.run("Could you get me the title of the page at url 'https://huggingface.co आप मैन्युअल रूप से एक टूल का उपयोग उसके आर्ग्यूमेंट्स के साथ कॉल करके कर सकते हैं। ```python -from smolagents import DuckDuckGoSearchTool +from smolagents import WebSearchTool -search_tool = DuckDuckGoSearchTool() +search_tool = WebSearchTool() print(search_tool("Who's the current president of Russia?")) ``` @@ -283,14 +283,14 @@ Microsoft के फ्रेमवर्क [Autogen](https://huggingface.co/pa ऐसा करने के लिए, एजेंट को [`ManagedAgent`] ऑब्जेक्ट में समाहित करें। यह ऑब्जेक्ट `agent`, `name`, और एक `description` जैसे तर्कों की आवश्यकता होती है, जो फिर मैनेजर एजेंट की सिस्टम प्रॉम्प्ट में एम्बेड किया जाता है -यहां एक एजेंट बनाने का उदाहरण दिया गया है जो हमारे [`DuckDuckGoSearchTool`] का उपयोग करके एक विशिष्ट वेब खोज एजेंट को प्रबंधित करता है। +यहां एक एजेंट बनाने का उदाहरण दिया गया है जो हमारे [`WebSearchTool`] का उपयोग करके एक विशिष्ट वेब खोज एजेंट को प्रबंधित करता है। ```py -from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool, ManagedAgent +from smolagents import CodeAgent, InferenceClientModel, WebSearchTool, ManagedAgent model = InferenceClientModel() -web_agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model) +web_agent = CodeAgent(tools=[WebSearchTool()], model=model) managed_web_agent = ManagedAgent( agent=web_agent, diff --git a/docs/source/hi/tutorials/building_good_agents.mdx b/docs/source/hi/tutorials/building_good_agents.mdx index 0baa206f6..e074f14ad 100644 --- a/docs/source/hi/tutorials/building_good_agents.mdx +++ b/docs/source/hi/tutorials/building_good_agents.mdx @@ -397,7 +397,7 @@ This also works with the [`ToolCallingAgent`]. हम पूरक योजना चरण के लिए एक मॉडल प्रदान करते हैं, जिसे एजेंट सामान्य क्रियाओं के चरणों के बीच नियमित रूप से चला सकता है। इस चरण में कोई टूल कॉल नहीं होती है, LLM से केवल उन तथ्यों की सूची को अपडेट करने के लिए कहा जाता है जो उसे ज्ञात हैं और इन तथ्यों के आधार पर उसे अगले कदमों के बारे में विचार करना होता है। ```py -from smolagents import load_tool, CodeAgent, InferenceClientModel, DuckDuckGoSearchTool +from smolagents import load_tool, CodeAgent, InferenceClientModel, WebSearchTool from dotenv import load_dotenv load_dotenv() @@ -405,7 +405,7 @@ load_dotenv() # Import tool from Hub image_generation_tool = load_tool("m-ric/text-to-image", trust_remote_code=True) -search_tool = DuckDuckGoSearchTool() +search_tool = WebSearchTool() agent = CodeAgent( tools=[search_tool], diff --git a/docs/source/hi/tutorials/inspect_runs.mdx b/docs/source/hi/tutorials/inspect_runs.mdx index 127bca148..a42ecde58 100644 --- a/docs/source/hi/tutorials/inspect_runs.mdx +++ b/docs/source/hi/tutorials/inspect_runs.mdx @@ -56,7 +56,7 @@ SmolagentsInstrumentor().instrument(tracer_provider=trace_provider) from smolagents import ( CodeAgent, ToolCallingAgent, - DuckDuckGoSearchTool, + WebSearchTool, VisitWebpageTool, InferenceClientModel, ) @@ -64,7 +64,7 @@ from smolagents import ( model = InferenceClientModel() managed_agent = ToolCallingAgent( - tools=[DuckDuckGoSearchTool(), VisitWebpageTool()], + tools=[WebSearchTool(), VisitWebpageTool()], model=model, name="managed_agent", description="This is an agent that can do web search.", diff --git a/docs/source/zh/examples/multiagents.mdx b/docs/source/zh/examples/multiagents.mdx index 567e7573f..a30d9e2b3 100644 --- a/docs/source/zh/examples/multiagents.mdx +++ b/docs/source/zh/examples/multiagents.mdx @@ -50,8 +50,8 @@ model_id = "Qwen/Qwen2.5-Coder-32B-Instruct" ## 🔍 创建网络搜索工具 虽然我们可以使用已经存在的 -[`DuckDuckGoSearchTool`](https://github.com/huggingface/smolagents/blob/main/src/smolagents/default_tools.py#L151-L176) -工具作为谷歌搜索的平替进行网页浏览,然后我们也需要能够查看`DuckDuckGoSearchTool`找到的页面。为此,我 +[`WebSearchTool`] +工具作为谷歌搜索的平替进行网页浏览,然后我们也需要能够查看`WebSearchTool`找到的页面。为此,我 们可以直接导入库的内置 `VisitWebpageTool`。但是我们将重新构建它以了解其工作原理。 @@ -114,14 +114,14 @@ from smolagents import ( ToolCallingAgent, InferenceClientModel, ManagedAgent, - DuckDuckGoSearchTool, + WebSearchTool, LiteLLMModel, ) model = InferenceClientModel(model_id=model_id) web_agent = ToolCallingAgent( - tools=[DuckDuckGoSearchTool(), visit_webpage], + tools=[WebSearchTool(), visit_webpage], model=model, max_steps=10, name="search", diff --git a/docs/source/zh/guided_tour.mdx b/docs/source/zh/guided_tour.mdx index e851b79b8..f294e7a6e 100644 --- a/docs/source/zh/guided_tour.mdx +++ b/docs/source/zh/guided_tour.mdx @@ -221,9 +221,9 @@ agent.run("Could you get me the title of the page at url 'https://huggingface.co 您可以通过调用 [`load_tool`] 函数和要执行的任务手动使用工具。 ```python -from smolagents import DuckDuckGoSearchTool +from smolagents import WebSearchTool -search_tool = DuckDuckGoSearchTool() +search_tool = WebSearchTool() print(search_tool("Who's the current president of Russia?")) ``` @@ -352,14 +352,14 @@ Out[20]: 'ByteDance/AnimateDiff-Lightning' 为此,将 agent 封装在 [`ManagedAgent`] 对象中。此对象需要参数 `agent`、`name` 和 `description`,这些参数将嵌入到管理 agent 的系统提示中,以让它知道如何调用此托管 agent,就像我们对工具所做的那样。 -以下是一个使用我们的 [`DuckDuckGoSearchTool`] 制作一个管理特定网页搜索 agent 的 agent 的示例: +以下是一个使用我们的 [`WebSearchTool`] 制作一个管理特定网页搜索 agent 的 agent 的示例: ```py -from smolagents import CodeAgent, InferenceClientModel, DuckDuckGoSearchTool, ManagedAgent +from smolagents import CodeAgent, InferenceClientModel, WebSearchTool, ManagedAgent model = InferenceClientModel() -web_agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model) +web_agent = CodeAgent(tools=[WebSearchTool()], model=model) managed_web_agent = ManagedAgent( agent=web_agent, diff --git a/docs/source/zh/tutorials/building_good_agents.mdx b/docs/source/zh/tutorials/building_good_agents.mdx index a70d251ce..5fca995b7 100644 --- a/docs/source/zh/tutorials/building_good_agents.mdx +++ b/docs/source/zh/tutorials/building_good_agents.mdx @@ -395,7 +395,7 @@ agent.prompt_templates["system_prompt"] = agent.prompt_templates["system_prompt" 我们提供了一个用于补充规划步骤的模型,agent 可以在正常操作步骤之间定期运行。在此步骤中,没有工具调用,LLM 只是被要求更新它知道的事实列表,并根据这些事实反推它应该采取的下一步。 ```py -from smolagents import load_tool, CodeAgent, InferenceClientModel, DuckDuckGoSearchTool +from smolagents import load_tool, CodeAgent, InferenceClientModel, WebSearchTool from dotenv import load_dotenv load_dotenv() @@ -403,7 +403,7 @@ load_dotenv() # 从 Hub 导入工具 image_generation_tool = load_tool("m-ric/text-to-image", trust_remote_code=True) -search_tool = DuckDuckGoSearchTool() +search_tool = WebSearchTool() agent = CodeAgent( tools=[search_tool], diff --git a/docs/source/zh/tutorials/inspect_runs.mdx b/docs/source/zh/tutorials/inspect_runs.mdx index ea3eb659b..f6f4be8b8 100644 --- a/docs/source/zh/tutorials/inspect_runs.mdx +++ b/docs/source/zh/tutorials/inspect_runs.mdx @@ -56,7 +56,7 @@ SmolagentsInstrumentor().instrument() from smolagents import ( CodeAgent, ToolCallingAgent, - DuckDuckGoSearchTool, + WebSearchTool, VisitWebpageTool, InferenceClientModel, ) @@ -64,7 +64,7 @@ from smolagents import ( model = InferenceClientModel() search_agent = ToolCallingAgent( - tools=[DuckDuckGoSearchTool(), VisitWebpageTool()], + tools=[WebSearchTool(), VisitWebpageTool()], model=model, name="search_agent", description="This is an agent that can do web search.", @@ -145,7 +145,7 @@ SmolagentsInstrumentor().instrument(tracer_provider=trace_provider) from smolagents import ( CodeAgent, ToolCallingAgent, - DuckDuckGoSearchTool, + WebSearchTool, VisitWebpageTool, InferenceClientModel, ) @@ -155,7 +155,7 @@ model = InferenceClientModel( ) search_agent = ToolCallingAgent( - tools=[DuckDuckGoSearchTool(), VisitWebpageTool()], + tools=[WebSearchTool(), VisitWebpageTool()], model=model, name="search_agent", description="This is an agent that can do web search.", diff --git a/docs/source/zh/tutorials/memory.mdx b/docs/source/zh/tutorials/memory.mdx index de2bdc8c3..900128f37 100644 --- a/docs/source/zh/tutorials/memory.mdx +++ b/docs/source/zh/tutorials/memory.mdx @@ -82,7 +82,7 @@ def update_screenshot(memory_step: ActionStep, agent: CodeAgent) -> None: ```py CodeAgent( - tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f], + tools=[WebSearchTool(), go_back, close_popups, search_item_ctrl_f], model=model, additional_authorized_imports=["helium"], step_callbacks=[update_screenshot], diff --git a/examples/inspect_multiagent_run.py b/examples/inspect_multiagent_run.py index 95032cd34..c68dccb75 100644 --- a/examples/inspect_multiagent_run.py +++ b/examples/inspect_multiagent_run.py @@ -8,10 +8,10 @@ from smolagents import ( CodeAgent, - DuckDuckGoSearchTool, InferenceClientModel, ToolCallingAgent, VisitWebpageTool, + WebSearchTool, ) @@ -19,7 +19,7 @@ model = InferenceClientModel() search_agent = ToolCallingAgent( - tools=[DuckDuckGoSearchTool(), VisitWebpageTool()], + tools=[WebSearchTool(), VisitWebpageTool()], model=model, name="search_agent", description="This is an agent that can do web search.", diff --git a/examples/multi_llm_agent.py b/examples/multi_llm_agent.py index e46354e03..6f44ff8b4 100644 --- a/examples/multi_llm_agent.py +++ b/examples/multi_llm_agent.py @@ -1,6 +1,6 @@ import os -from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMRouterModel +from smolagents import CodeAgent, LiteLLMRouterModel, WebSearchTool # Make sure to setup the necessary environment variables! @@ -39,6 +39,6 @@ model_list=llm_loadbalancer_model_list, client_kwargs={"routing_strategy": "simple-shuffle"}, ) -agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model, stream_outputs=True) +agent = CodeAgent(tools=[WebSearchTool()], model=model, stream_outputs=True) agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") diff --git a/examples/open_deep_research/visual_vs_text_browser.ipynb b/examples/open_deep_research/visual_vs_text_browser.ipynb index 7015c4905..1acc5c704 100644 --- a/examples/open_deep_research/visual_vs_text_browser.ipynb +++ b/examples/open_deep_research/visual_vs_text_browser.ipynb @@ -167,7 +167,7 @@ "source": [ "from scripts.visual_qa import VisualQAGPT4Tool\n", "\n", - "from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel\n", + "from smolagents import CodeAgent, LiteLLMModel, WebSearchTool\n", "from smolagents.vision_web_browser import (\n", " close_popups,\n", " go_back,\n", @@ -183,7 +183,7 @@ "### BUILD AGENTS & TOOLS\n", "\n", "CodeAgent(\n", - " tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f],\n", + " tools=[WebSearchTool(), go_back, close_popups, search_item_ctrl_f],\n", " model=proprietary_model,\n", " additional_authorized_imports=[\"helium\"],\n", " step_callbacks=[save_screenshot],\n", diff --git a/examples/sandboxed_execution.py b/examples/sandboxed_execution.py index 25e4fb771..4bd81f029 100644 --- a/examples/sandboxed_execution.py +++ b/examples/sandboxed_execution.py @@ -1,12 +1,12 @@ -from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel +from smolagents import CodeAgent, InferenceClientModel, WebSearchTool model = InferenceClientModel() -agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model, executor_type="docker") +agent = CodeAgent(tools=[WebSearchTool()], model=model, executor_type="docker") output = agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") print("Docker executor result:", output) -agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model, executor_type="e2b") +agent = CodeAgent(tools=[WebSearchTool()], model=model, executor_type="e2b") output = agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") print("E2B executor result:", output) diff --git a/src/smolagents/default_tools.py b/src/smolagents/default_tools.py index a8677e405..617dee2d1 100644 --- a/src/smolagents/default_tools.py +++ b/src/smolagents/default_tools.py @@ -210,7 +210,7 @@ def forward(self, query: str, filter_year: int | None = None) -> str: return "## Search Results\n" + "\n\n".join(web_snippets) -class SimpleWebSearchTool(Tool): +class WebSearchTool(Tool): name = "web_search" description = "Performs a web search for a query and returns a string of the top search results formatted as markdown with titles, links, and descriptions." inputs = {"query": {"type": "string", "description": "The search query to perform."}} @@ -493,6 +493,7 @@ def decode(self, outputs): "PythonInterpreterTool", "FinalAnswerTool", "UserInputTool", + "WebSearchTool", "DuckDuckGoSearchTool", "GoogleSearchTool", "VisitWebpageTool", diff --git a/src/smolagents/models.py b/src/smolagents/models.py index ae930ae5d..6b9b40fff 100644 --- a/src/smolagents/models.py +++ b/src/smolagents/models.py @@ -1099,7 +1099,7 @@ class LiteLLMRouterModel(LiteLLMModel): Example: ```python >>> import os - >>> from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMRouterModel + >>> from smolagents import CodeAgent, WebSearchTool, LiteLLMRouterModel >>> os.environ["OPENAI_API_KEY"] = "" >>> os.environ["AWS_ACCESS_KEY_ID"] = "" >>> os.environ["AWS_SECRET_ACCESS_KEY"] = "" @@ -1129,7 +1129,7 @@ class LiteLLMRouterModel(LiteLLMModel): ... "routing_strategy":"simple-shuffle" ... } >>> ) - >>> agent = CodeAgent(tools=[DuckDuckGoSearchTool()], model=model) + >>> agent = CodeAgent(tools=[WebSearchTool()], model=model) >>> agent.run("How many seconds would it take for a leopard at full speed to run through Pont des Arts?") ``` """ diff --git a/src/smolagents/vision_web_browser.py b/src/smolagents/vision_web_browser.py index 8886ec97e..ace21d19d 100644 --- a/src/smolagents/vision_web_browser.py +++ b/src/smolagents/vision_web_browser.py @@ -9,7 +9,7 @@ from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys -from smolagents import CodeAgent, DuckDuckGoSearchTool, tool +from smolagents import CodeAgent, WebSearchTool, tool from smolagents.agents import ActionStep from smolagents.cli import load_model @@ -120,7 +120,7 @@ def initialize_driver(): def initialize_agent(model): """Initialize the CodeAgent with the specified model.""" return CodeAgent( - tools=[DuckDuckGoSearchTool(), go_back, close_popups, search_item_ctrl_f], + tools=[WebSearchTool(), go_back, close_popups, search_item_ctrl_f], model=model, additional_authorized_imports=["helium"], step_callbacks=[save_screenshot], From 31389a4848b6e5040880db0c72d118f5cf22497f Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Wed, 7 May 2025 17:24:27 +0200 Subject: [PATCH 25/25] Release: v1.15.0 --- pyproject.toml | 2 +- src/smolagents/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index c87df62d1..6b3f2d111 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "smolagents" -version = "1.15.0.dev0" +version = "1.15.0" description = "🤗 smolagents: a barebones library for agents. Agents write python code to call tools or orchestrate other agents." authors = [ { name="Aymeric Roucher", email="aymeric@hf.co" }, diff --git a/src/smolagents/__init__.py b/src/smolagents/__init__.py index be4c3c19e..a649b4572 100644 --- a/src/smolagents/__init__.py +++ b/src/smolagents/__init__.py @@ -14,7 +14,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "1.15.0.dev0" +__version__ = "1.15.0" from .agent_types import * # noqa: I001 from .agents import * # Above noqa avoids a circular dependency due to cli.py