FSoft-AI4Code
diff --git a/‎README.md‎
Lines changed: 15 additions & 6 deletions b/‎README.md‎
Lines changed: 15 additions & 6 deletions
diff --git a/‎codewiki/src/be/llm_services.py‎
Lines changed: 46 additions & 18 deletions b/‎codewiki/src/be/llm_services.py‎
Lines changed: 46 additions & 18 deletions
diff --git a/‎codewiki/src/be/utils.py‎
Lines changed: 86 additions & 72 deletions b/‎codewiki/src/be/utils.py‎
Lines changed: 86 additions & 72 deletions
@@ -45,14 +45,23 @@ codewiki --version
 CodeWiki supports multiple LLM providers: **OpenAI-compatible**, **Anthropic**, **AWS Bedrock**, **Azure OpenAI**, plus subscription mode via **Claude Code** and **Codex** CLIs (no API key required).
 
 ```bash
-# Anthropic
+# OpenAI-compatible
 codewiki config set \
   --provider openai-compatible \
-  --api-key 3rRvIM7UNTmwt9ugiFi0Zkjgn0JA8WOjEUMfATsO \
-  --base-url  https://gateway.ai.cloudflare.com/v1/def31e2cf1530789c604bdaa2abbfcf1/openai-proxy/compat \
-  --main-model openai/gpt-5.4 \
-  --cluster-model openai/gpt-5.4 \
-  --fallback-model openai/gpt-5.3
+  --api-key YOUR_API_KEY \
+  --base-url https://api.anthropic.com \
+  --main-model claude-sonnet-4 \
+  --cluster-model claude-sonnet-4 \
+  --fallback-model glm-4p5
+
+# Anthropic
+codewiki config set \
+  --provider anthropic \
+  --api-key YOUR_API_KEY \
+  --base-url https://api.anthropic.com \
+  --main-model claude-sonnet-4 \
+  --cluster-model claude-sonnet-4 \
+  --fallback-model glm-4p5
 
 # Azure OpenAI
 codewiki config set \
 
@@ -13,7 +13,7 @@
 from pydantic_ai.providers.openai import OpenAIProvider
 from pydantic_ai.models.openai import OpenAIModelSettings
 from pydantic_ai.models.fallback import FallbackModel
-from openai import OpenAI
+from openai import OpenAI, BadRequestError
 
 from codewiki.src.config import Config
 
@@ -24,12 +24,12 @@ def _should_use_max_completion_tokens(model_name: str, base_url: str) -> bool:
     """
     Determine whether to use max_completion_tokens instead of max_tokens.
 
-    Newer OpenAI models (o1, o3, gpt-4o, etc.) require max_completion_tokens.
-    Anthropic and other providers still use max_tokens.
+    Newer OpenAI models (o1, o3, o4, gpt-4o, gpt-5, etc.) require
+    max_completion_tokens. Anthropic and other providers still use max_tokens.
     """
     model_lower = model_name.lower()
     # OpenAI models that require max_completion_tokens
-    new_openai_patterns = ("o1", "o3", "gpt-4o", "gpt-4-turbo")
+    new_openai_patterns = ("o1", "o3", "o4", "gpt-4o", "gpt-4-turbo", "gpt-5")
     if any(pattern in model_lower for pattern in new_openai_patterns):
         return True
     # If base_url points to OpenAI directly, newer models may need it
@@ -180,23 +180,51 @@ def call_llm(
     # Default: OpenAI-compatible
     client = create_openai_client(config)
 
-    # Use the correct token parameter based on model/provider
-    token_kwargs = {}
-    if _should_use_max_completion_tokens(model, config.llm_base_url):
-        token_kwargs["max_completion_tokens"] = config.max_tokens
-        logger.debug("Using max_completion_tokens=%d for model %s", config.max_tokens, model)
-    else:
-        token_kwargs["max_tokens"] = config.max_tokens
-
-    response = client.chat.completions.create(
-        model=model,
-        messages=[{"role": "user", "content": prompt}],
-        temperature=temperature,
-        **token_kwargs
-    )
+    # Use the correct token parameter based on model/provider; if the server
+    # rejects our choice, swap to the other token kwarg and retry once.
+    use_completion_tokens = _should_use_max_completion_tokens(model, config.llm_base_url)
+    primary_key = "max_completion_tokens" if use_completion_tokens else "max_tokens"
+    fallback_key = "max_tokens" if use_completion_tokens else "max_completion_tokens"
+
+    base_kwargs = {
+        "model": model,
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": temperature,
+    }
+
+    try:
+        response = client.chat.completions.create(
+            **base_kwargs,
+            **{primary_key: config.max_tokens},
+        )
+    except BadRequestError as e:
+        if _is_unsupported_token_param_error(e, primary_key):
+            logger.info(
+                "Provider rejected %s for model %s; retrying with %s.",
+                primary_key, model, fallback_key,
+            )
+            response = client.chat.completions.create(
+                **base_kwargs,
+                **{fallback_key: config.max_tokens},
+            )
+        else:
+            raise
     return response.choices[0].message.content
 
 
+def _is_unsupported_token_param_error(err: BadRequestError, param: str) -> bool:
+    """Return True if *err* is the OpenAI "unsupported_parameter" error for *param*."""
+    body = getattr(err, "body", None) or {}
+    if isinstance(body, dict):
+        error = body.get("error") or {}
+        if isinstance(error, dict):
+            if error.get("param") == param and error.get("code") == "unsupported_parameter":
+                return True
+    # Fallback: message-based sniff for proxies that don't preserve structure
+    msg = str(err).lower()
+    return "unsupported parameter" in msg and param in msg
+
+
 def _call_llm_via_litellm(
     prompt: str,
     config: Config,
 
@@ -143,92 +143,106 @@ def extract_mermaid_blocks(content: str) -> List[Tuple[int, str]]:
     return mermaid_blocks
 
 
+_PYTHONMONKEY_BROKEN = False
+
+
+async def _try_pythonmonkey_parse(diagram_content: str) -> str | None:
+    """Attempt to parse via PythonMonkey/mermaid-parser-py.
+
+    Returns the extracted parse-error message, "" on success, or None when
+    PythonMonkey itself is unusable (broken JS event loop binding on
+    Python 3.13+) so the caller can fall back to mermaid-py.
+    """
+    global _PYTHONMONKEY_BROKEN
+    if _PYTHONMONKEY_BROKEN:
+        return None
+
+    import sys
+    import os
+
+    try:
+        from mermaid_parser.parser import parse_mermaid_py
+    except Exception:
+        _PYTHONMONKEY_BROKEN = True
+        return None
+
+    old_stderr = sys.stderr
+    sys.stderr = open(os.devnull, 'w')
+    try:
+        if (
+            _main_loop is not None
+            and _main_loop.is_running()
+            and threading.get_ident() != _main_loop_thread_ident
+        ):
+            fut = asyncio.run_coroutine_threadsafe(
+                parse_mermaid_py(diagram_content), _main_loop
+            )
+            await asyncio.wrap_future(fut)
+        else:
+            await parse_mermaid_py(diagram_content)
+        return ""
+    except Exception as e:
+        error_str = str(e)
+        # PythonMonkey 1.3.1 only supports Python 3.8-3.11; on newer Pythons
+        # every JS call raises this. Latch the failure once so subsequent
+        # diagrams skip the broken path and go straight to mermaid-py.
+        if "cannot find a running Python event-loop" in error_str:
+            _PYTHONMONKEY_BROKEN = True
+            return None
+        match = re.search(r"Error:(.*?)(?=Stack Trace:|$)", error_str, re.DOTALL)
+        if match:
+            return match.group(0).strip()
+        # Unknown error from the JS parser — fall back rather than surface it.
+        return None
+    finally:
+        sys.stderr.close()
+        sys.stderr = old_stderr
+
+
+def _parse_via_mermaid_py(diagram_content: str) -> str:
+    """Validate via mermaid-py. Returns parse-error text, or "" if valid.
+
+    mermaid-py raises MermaidError on parse failure and returns an SVG body
+    on success — we must drive the result off the exception, not the body
+    text, otherwise a successful SVG gets reported as a parse error.
+    """
+    import mermaid as md
+    try:
+        md.Mermaid(diagram_content)
+        return ""
+    except Exception as e:
+        return str(e)
+
+
 async def validate_single_diagram(diagram_content: str, diagram_num: int, line_start: int) -> str:
     """
     Validate a single mermaid diagram.
-    
+
     Args:
         diagram_content: The mermaid diagram content
         diagram_num: Diagram number for error reporting
         line_start: Starting line number in the file
-        
+
     Returns:
         Error message if invalid, empty string if valid
     """
-    import sys
-    import os
-    from io import StringIO
-
-    core_error = ""
-    
-    try:
-        from mermaid_parser.parser import parse_mermaid_py
-        # logger.debug("Using mermaid-parser-py to validate mermaid diagrams")
-    
-        try:
-            # Redirect stderr to suppress mermaid parser JavaScript errors
-            old_stderr = sys.stderr
-            sys.stderr = open(os.devnull, 'w')
-            
-            try:
-                if (
-                    _main_loop is not None
-                    and _main_loop.is_running()
-                    and threading.get_ident() != _main_loop_thread_ident
-                ):
-                    # Caller is on a worker-thread loop (caw FastMCP path).
-                    # Run the coroutine on the loop where PythonMonkey was
-                    # bound so its asyncio.get_running_loop() succeeds.
-                    fut = asyncio.run_coroutine_threadsafe(
-                        parse_mermaid_py(diagram_content), _main_loop
-                    )
-                    json_output = await asyncio.wrap_future(fut)
-                else:
-                    json_output = await parse_mermaid_py(diagram_content)
-            finally:
-                # Restore stderr
-                sys.stderr.close()
-                sys.stderr = old_stderr
-        except Exception as e:
-            error_str = str(e)
-            
-            # Extract the core error information from the exception message
-            # Look for the pattern that contains "Parse error on line X:"
-            error_pattern = r"Error:(.*?)(?=Stack Trace:|$)"
-            match = re.search(error_pattern, error_str, re.DOTALL)
-            
-            if match:
-                core_error = match.group(0).strip()
-                core_error = core_error
-            else:
-                logger.error(f"No match found for error pattern, fallback to mermaid-py\n{error_str}")
-                logger.error(f"Traceback: {traceback.format_exc()}")
-                raise Exception(error_str)
-
-    except Exception as e:
-        logger.warning("Using mermaid-py to validate mermaid diagrams")
+    core_error = await _try_pythonmonkey_parse(diagram_content)
+    if core_error is None:
         try:
-            import mermaid as md
-            # Create Mermaid object and check response
-            render = md.Mermaid(diagram_content)
-            core_error = render.svg_response.text
-            
+            core_error = _parse_via_mermaid_py(diagram_content)
         except Exception as e:
             return f"  Diagram {diagram_num}: Exception during validation - {str(e)}"
 
-    # Check if response indicates a parse error
-    if core_error:
-        # Extract line number from parse error and calculate actual line in markdown file
-        line_match = re.search(r'line (\d+)', core_error)
-        if line_match:
-            error_line_in_diagram = int(line_match.group(1))
-            actual_line_in_file = line_start + error_line_in_diagram
-            newline = '\n'
-            return f"Diagram {diagram_num}: Parse error on line {actual_line_in_file}:{newline}{newline.join(core_error.split(newline)[1:])}"
-        else:
-            return f"Diagram {diagram_num}: {core_error}"
-    
-    return ""  # No error
+    if not core_error:
+        return ""
+
+    line_match = re.search(r'line (\d+)', core_error)
+    if line_match:
+        error_line_in_diagram = int(line_match.group(1))
+        actual_line_in_file = line_start + error_line_in_diagram
+        newline = '\n'
+        return f"Diagram {diagram_num}: Parse error on line {actual_line_in_file}:{newline}{newline.join(core_error.split(newline)[1:])}"
+    return f"Diagram {diagram_num}: {core_error}"
 
 
 if __name__ == "__main__":