Enhance agent functionality and improve error handling; add agent-run script and validate repository paths

glp-92 · Copilot · glp-92 · commit 905a4fafc840 · 2026-04-29T19:24:56.000+02:00
Co-authored-by: Copilot &lt;copilot@github.com&gt;
diff --git a/README.md b/README.md
@@ -62,22 +62,34 @@ uv run pre-commit run --all-files
 docker compose up -d
 ```
 
-## Ejecutar API en local (modo desarrollo)
-
-No aplica: la API intermedia fue eliminada para simplificar el stack.
-
 ## Ejecutar agente por CLI
 
 El agente recibe el prompt como argumento:
 
 ```bash
-uv run python agent/src/main.py "tu prompt aqui"
+./agent-run "tu prompt aqui"
+```
+
+### Uso desde cualquier repositorio objetivo
+
+Si ya estás parado en el repositorio donde quieres que el agente trabaje:
+
+```bash
+REPOSITORY_ROOT_PATH="$PWD" /home/glp-desktop/Workspace/Open-Coder-Agent/agent-run "tu prompt aqui"
+```
+
+Eso ejecuta el agente instalado en Open-Coder-Agent, pero operando sobre el repo actual (`$PWD`).
+
+Comando avanzado equivalente (solo si lo necesitas):
+
+```bash
+REPOSITORY_ROOT_PATH="$PWD" uv --directory /home/glp-desktop/Workspace/Open-Coder-Agent run python agent/src/main.py "tu prompt aqui"
 ```
 
 Ejemplo:
 
 ```bash
-uv run python agent/src/main.py "revisa el modulo de tools y propone mejoras de rendimiento"
+./agent-run "revisa el modulo de tools y propone mejoras de rendimiento"
 ```
 
 ## Verificación rápida de calidad
diff --git a/agent-run b/agent-run
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+if [[ $# -lt 1 ]]; then
+  echo 'Usage: agent-run "your prompt"' >&2
+  exit 2
+fi
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TARGET_ROOT="${REPOSITORY_ROOT_PATH:-$PWD}"
+PROMPT="$*"
+
+REPOSITORY_ROOT_PATH="$TARGET_ROOT" uv --directory "$SCRIPT_DIR" run python agent/src/main.py "$PROMPT"
diff --git a/agent/src/config/config.py b/agent/src/config/config.py
@@ -2,7 +2,7 @@
 from pathlib import Path
 
 from dotenv import load_dotenv
-from pydantic import BaseModel, Field, model_validator
+from pydantic import BaseModel, Field, field_validator, model_validator
 
 load_dotenv(dotenv_path=Path(__file__).resolve().parent.parent.parent.parent / ".env")
 
@@ -26,6 +26,16 @@ class Config(BaseModel):
     chat_window_size: int = Field(default=6, ge=4)
     repository_root_path: str = Field(min_length=1)
 
+    @field_validator("repository_root_path")
+    @classmethod
+    def _validate_repository_root_path(cls, value: str) -> str:
+        root = Path(value).expanduser().resolve()
+        if not root.is_absolute():
+            raise ValueError("repository_root_path must be an absolute path")
+        if not root.exists() or not root.is_dir():
+            raise ValueError("repository_root_path must exist and be a directory")
+        return str(root)
+
     @model_validator(mode="after")
     def _validate_message_windows(self):
         if self.max_messages_for_summary < self.messages_to_summarize:
@@ -41,11 +51,11 @@ def _validate_message_windows(self):
 config = Config(
     ollama_url=_first_env("OLLAMA_URL", default="http://localhost:11434"),
     llm_model=_first_env("MODEL_NAME", default="qwen3.5:2b"),
-    model_num_ctx=int(_first_env("AGENT_MODEL_NUM_CTX", "MODEL_NUM_CTX", default="4096")),
+    model_num_ctx=_first_env("AGENT_MODEL_NUM_CTX", "MODEL_NUM_CTX", default="4096"),
     agent_config_prompt=prompt,
-    max_steps=int(_first_env("AGENT_MAX_STEPS", "MAX_STEPS", default="20")),
-    max_messages_for_summary=int(_first_env("AGENT_MAX_MESSAGES_FOR_SUMMARY", default="10")),
-    messages_to_summarize=int(_first_env("AGENT_MESSAGES_TO_SUMMARIZE", default="4")),
-    chat_window_size=int(_first_env("AGENT_CHAT_WINDOW_SIZE", default="6")),
+    max_steps=_first_env("AGENT_MAX_STEPS", "MAX_STEPS", default="20"),
+    max_messages_for_summary=_first_env("AGENT_MAX_MESSAGES_FOR_SUMMARY", default="10"),
+    messages_to_summarize=_first_env("AGENT_MESSAGES_TO_SUMMARIZE", default="4"),
+    chat_window_size=_first_env("AGENT_CHAT_WINDOW_SIZE", default="6"),
     repository_root_path=_first_env("REPOSITORY_ROOT_PATH") or "",
 )
diff --git a/agent/src/config/prompt.md b/agent/src/config/prompt.md
@@ -16,11 +16,22 @@ Careful local coding agent optimized for small offline models.
 - Avoid `get_repository_tree` unless the local folder structure is unclear.
 - Use `create_file` only when the file does not exist.
 - Use `write_file` only after reading enough of the target file to preserve it safely.
-- After Python edits, run `run_linting` on the touched paths.
+- For create/init/scaffold requests, you must call at least one writing tool (`make_dirs`, `create_file`, or `write_file`) before finishing.
+- Never end a create/init/scaffold request with only linting or inspection calls.
+- Run `run_linting` only after Python edits and only with explicit touched paths.
+- If a tool call fails due invalid arguments/schema, immediately retry the same tool with corrected arguments.
+- Do not finish the task right after a tool-argument error.
+- If `list_dir` returns an empty directory during a scaffold task, do not call `list_dir` on the same path again; create the next required files.
 
 # Workflow
 
 1. Inspect narrowly.
 2. Make the smallest safe change.
 3. Validate the touched Python files.
 4. Return a short summary with remaining risk, if any.
+
+# Completion criteria
+
+- Do not finish if the task asks to create/modify code and no successful writing tool has run.
+- Do not finish immediately after a tool error; retry with corrected arguments first.
+- For scaffold tasks, ensure at least one runnable entry file and one relevant test file are created before finishing.
diff --git a/agent/src/graph/nodes.py b/agent/src/graph/nodes.py
@@ -1,13 +1,62 @@
 from config.config import config
 from graph.state import AgentState
-from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage
 from langgraph.prebuilt import ToolNode
 from model.model import model, summary_model
 from tools.registry import TOOLS_REGISTRY
 
 tool_node = ToolNode(tools=TOOLS_REGISTRY, messages_key="messages")
 
 
+def _is_repeating_tool_result(messages: list) -> bool:
+    recent_non_system = [message for message in messages if message.type != "system"][-8:]
+    recent_tool_messages = [message for message in recent_non_system if isinstance(message, ToolMessage)]
+    if len(recent_tool_messages) < 2:
+        return False
+    last_tool = recent_tool_messages[-1]
+    prev_tool = recent_tool_messages[-2]
+    return last_tool.name == prev_tool.name and str(last_tool.content).strip() == str(prev_tool.content).strip()
+
+
+def _task_requires_file_changes(messages: list) -> bool:
+    first_human = next((message for message in messages if isinstance(message, HumanMessage)), None)
+    if not first_human:
+        return False
+    prompt_text = str(first_human.content).lower()
+    write_keywords = [
+        "crea",
+        "create",
+        "implement",
+        "estructura",
+        "scaffold",
+        "refactor",
+        "fix",
+        "corrige",
+        "modifica",
+        "build",
+    ]
+    return any(keyword in prompt_text for keyword in write_keywords)
+
+
+def _has_successful_write(messages: list) -> bool:
+    write_tools = {"make_dir", "make_dirs", "create_file", "write_file"}
+    for message in messages:
+        if isinstance(message, ToolMessage) and message.name in write_tools:
+            content = str(message.content)
+            if "Success:" in content:
+                return True
+    return False
+
+
+def _last_tool_error(messages: list) -> bool:
+    for message in reversed(messages):
+        if isinstance(message, ToolMessage):
+            return str(message.content).strip().startswith("Error")
+        if isinstance(message, AIMessage):
+            break
+    return False
+
+
 def memory_manager_node(state: AgentState):
     """
     Makes a summary of oldest 'config.messages_to_summarize' messages
@@ -38,6 +87,18 @@ def explorer_node(state: AgentState):
     if steps > config.max_steps:
         return {"steps": steps, "messages": [AIMessage(content="Stopping: too many steps")]}
     all_messages = state["messages"]
+    if _is_repeating_tool_result(all_messages):
+        return {
+            "steps": steps,
+            "messages": [
+                AIMessage(
+                    content=(
+                        "Stopping to avoid a tool loop: same tool result was repeated. "
+                        "Use a different tool or finalize with a concise result."
+                    )
+                )
+            ],
+        }
     system_message = all_messages[0]
     window_size = config.chat_window_size
     if len(all_messages) > window_size:
@@ -46,6 +107,18 @@ def explorer_node(state: AgentState):
         messages_to_send = [system_message, *chat_context]
     else:
         messages_to_send = all_messages
+
+    if _last_tool_error(all_messages):
+        messages_to_send = [
+            *messages_to_send,
+            HumanMessage(content="The previous tool call failed. Retry with corrected tool arguments."),
+        ]
+    elif _task_requires_file_changes(all_messages) and not _has_successful_write(all_messages):
+        messages_to_send = [
+            *messages_to_send,
+            HumanMessage(content="This task requires file changes. Use writing tools before finishing."),
+        ]
+
     response = model.invoke(messages_to_send)
     return {"messages": [response], "steps": steps}
 
@@ -54,6 +127,12 @@ def router_logic(state: AgentState):
     """
     Control function that decides next step
     """
+    if state.get("steps", 0) < config.max_steps:
+        if _last_tool_error(state["messages"]):
+            return "retry"
+        if _task_requires_file_changes(state["messages"]) and not _has_successful_write(state["messages"]):
+            return "retry"
+
     last_message = state["messages"][-1]
     if getattr(last_message, "tool_calls", None):
         return "tool_executor"
diff --git a/agent/src/graph/workflow.py b/agent/src/graph/workflow.py
@@ -7,7 +7,9 @@
 workflow.add_node("tool_executor", tool_node)
 workflow.add_node("memory_manager", memory_manager_node)
 workflow.set_entry_point("explorer")
-workflow.add_conditional_edges("explorer", router_logic, {"tool_executor": "tool_executor", "end": END})
+workflow.add_conditional_edges(
+    "explorer", router_logic, {"tool_executor": "tool_executor", "retry": "explorer", "end": END}
+)
 workflow.add_edge("tool_executor", "memory_manager")
 workflow.add_edge("memory_manager", "explorer")
 graph = workflow.compile()
diff --git a/agent/src/main.py b/agent/src/main.py
@@ -20,5 +20,11 @@ def run(user_input: str) -> None:
 
 
 if __name__ == "__main__":
-    user_input: str = sys.argv[1]
+    if len(sys.argv) < 2:
+        print('Usage: python agent/src/main.py "your prompt"')  # noqa: T201
+        raise SystemExit(2)
+    user_input: str = " ".join(sys.argv[1:]).strip()
+    if not user_input:
+        print("Error: prompt cannot be empty")  # noqa: T201
+        raise SystemExit(2)
     run(user_input=user_input)
diff --git a/agent/src/tools/bash/executor.py b/agent/src/tools/bash/executor.py
@@ -7,7 +7,7 @@
 
 MAX_BASH_TIMEOUT_SECONDS = 20
 MAX_BASH_OUTPUT_CHARS = 8000
-FORBIDDEN_SHELL_TOKENS = {"&&", "||", ";", "|", ">", "<", "$(", "`"}
+UNSUPPORTED_SHELL_OPERATORS = {"&&", "||", ";", "|", ">", "<"}
 READ_ONLY_COMMAND_ALLOWLIST = {
     "pwd",
     "ls",
@@ -20,8 +20,8 @@
     "stat",
     "du",
     "git",
-    "python",
 }
+READ_ONLY_GIT_SUBCOMMANDS = {"status", "diff", "log", "show", "rev-parse", "branch", "ls-files"}
 
 
 def _truncate_output(output: str, max_chars: int) -> str:
@@ -30,11 +30,27 @@ def _truncate_output(output: str, max_chars: int) -> str:
     return f"{output[:max_chars]}\n... output truncated at {max_chars} chars"
 
 
+def _has_unsafe_path_args(parsed: list[str]) -> bool:
+    for arg in parsed[1:]:
+        if not arg or arg.startswith("-"):
+            continue
+        cleaned = arg.strip()
+        if cleaned in {".", "./"}:
+            continue
+        if cleaned.startswith(("/", "~", "../")):
+            return True
+        if cleaned == ".." or "/../" in cleaned:
+            return True
+    return False
+
+
 def _validate_bash_command(command: str) -> tuple[bool, str]:
     if not command.strip():
         return False, "Error: command cannot be empty"
-    if any(token in command for token in FORBIDDEN_SHELL_TOKENS):
-        return False, "Error: command contains forbidden shell operators"
+    if "$(" in command:
+        return False, "Error: command substitution is not allowed"
+    if "`" in command:
+        return False, "Error: backtick command substitution is not allowed"
 
     try:
         parsed = shlex.split(command)
@@ -44,23 +60,35 @@ def _validate_bash_command(command: str) -> tuple[bool, str]:
     if not parsed:
         return False, "Error: command cannot be empty"
 
+    if any(token in UNSUPPORTED_SHELL_OPERATORS for token in parsed):
+        return (
+            False,
+            "Error: shell chaining/redirection operators are not supported in run_bash. "
+            "Run a single command without &&, ||, |, ;, > or <.",
+        )
+
     executable = parsed[0]
     if executable not in READ_ONLY_COMMAND_ALLOWLIST:
         allowed = ", ".join(sorted(READ_ONLY_COMMAND_ALLOWLIST))
         return False, f"Error: command '{executable}' is not allowed. Allowed commands: {allowed}"
 
-    if executable == "git" and len(parsed) > 1:
-        forbidden_git_subcommands = {"reset", "clean", "checkout", "restore", "rebase", "push", "commit", "merge"}
-        if parsed[1] in forbidden_git_subcommands:
-            return False, f"Error: git subcommand '{parsed[1]}' is not allowed"
+    if _has_unsafe_path_args(parsed):
+        return False, "Error: command contains unsafe path arguments (absolute, home, or parent traversal)"
+
+    if executable == "git":
+        if len(parsed) < 2:
+            return False, "Error: git command requires a read-only subcommand"
+        if parsed[1] not in READ_ONLY_GIT_SUBCOMMANDS:
+            allowed = ", ".join(sorted(READ_ONLY_GIT_SUBCOMMANDS))
+            return False, f"Error: git subcommand '{parsed[1]}' is not allowed. Allowed: {allowed}"
 
     return True, "OK"
 
 
 @tool
 def run_bash(command: str, timeout_seconds: int = 10, max_output_chars: int = MAX_BASH_OUTPUT_CHARS) -> str:
     """
-    Run a single safe read-only bash command from repository root.
+    Run a single safe read-only command from repository root.
     """
     is_valid, reason = _validate_bash_command(command)
     if not is_valid:
diff --git a/agent/src/tools/python/coder.py b/agent/src/tools/python/coder.py
diff --git a/agent/src/tools/registry.py b/agent/src/tools/registry.py
diff --git a/agent/src/tools/utilities.py b/agent/src/tools/utilities.py