Fix the CI failure.

whoIam0987 · whoIam0987 · commit 3ba75d5a05c6 · 2026-03-02T10:25:48.000+08:00
diff --git a/CHANGELOG-loongsuite.md b/CHANGELOG-loongsuite.md
@@ -22,3 +22,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - `loongsuite-distro`: initialize loongsuite python agent distro
   ([#126](https://github.com/alibaba/loongsuite-python-agent/pull/126))
+
+- `loongsuite-instrumentation-litellm`: add support for litellm
+  ([#88](https://github.com/alibaba/loongsuite-python-agent/pull/88))
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-litellm/README.rst b/instrumentation-loongsuite/loongsuite-instrumentation-litellm/README.rst
@@ -22,7 +22,7 @@ Configuration
 
 The instrumentation can be enabled/disabled using environment variables:
 
-* ``ARMS_LITELLM_INSTRUMENTATION_ENABLED``: Enable/disable instrumentation (default: true)
+* ``ENABLE_LITELLM_INSTRUMENTOR``: Enable/disable instrumentation (default: true)
 
 Usage
 -----
@@ -49,7 +49,6 @@ This instrumentation automatically captures:
 * LLM completion calls (sync and async)
 * Streaming completions
 * Embedding calls
-* Image generation calls
 * Retry mechanisms
 * Tool/function calls
 * Request and response metadata
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-litellm/src/opentelemetry/instrumentation/litellm/__init__.py b/instrumentation-loongsuite/loongsuite-instrumentation-litellm/src/opentelemetry/instrumentation/litellm/__init__.py
@@ -31,7 +31,6 @@
 The instrumentation can be configured using environment variables:
 
 * ``ENABLE_LITELLM_INSTRUMENTOR``: Enable/disable instrumentation (default: true)
-* ``ARMS_LITELLM_INSTRUMENTATION_ENABLED``: Alternative enable/disable flag (default: true)
 
 Usage
 -----
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-litellm/src/opentelemetry/instrumentation/litellm/_embedding_wrapper.py b/instrumentation-loongsuite/loongsuite-instrumentation-litellm/src/opentelemetry/instrumentation/litellm/_embedding_wrapper.py
@@ -34,7 +34,7 @@
 
 def _is_instrumentation_enabled() -> bool:
     """Check if instrumentation is enabled via environment variable."""
-    enabled = os.getenv("ARMS_LITELLM_INSTRUMENTATION_ENABLED", "true").lower()
+    enabled = os.getenv("ENABLE_LITELLM_INSTRUMENTOR", "true").lower()
     return enabled != "false"
 
 
@@ -60,8 +60,6 @@ def __call__(self, *args, **kwargs):
             if get_current_span().get_span_context().is_valid:
                 return self.original_func(*args, **kwargs)
 
-        # Create invocation object
-
         # Create invocation object
         invocation = create_embedding_invocation_from_litellm(**kwargs)
 
@@ -72,6 +70,7 @@ def __call__(self, *args, **kwargs):
                 context.set_value(SUPPRESS_LLM_SDK_KEY, True)
             )
         except Exception:
+            # If context setting fails, continue without suppression token
             pass
 
         # Start Embedding invocation
@@ -161,8 +160,6 @@ async def __call__(self, *args, **kwargs):
             if get_current_span().get_span_context().is_valid:
                 return await self.original_func(*args, **kwargs)
 
-        # Create invocation object
-
         # Create invocation object
         invocation = create_embedding_invocation_from_litellm(**kwargs)
 
@@ -173,6 +170,7 @@ async def __call__(self, *args, **kwargs):
                 context.set_value(SUPPRESS_LLM_SDK_KEY, True)
             )
         except Exception:
+            # If context setting fails, continue without suppression token
             pass
 
         # Start Embedding invocation
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-litellm/src/opentelemetry/instrumentation/litellm/_utils.py b/instrumentation-loongsuite/loongsuite-instrumentation-litellm/src/opentelemetry/instrumentation/litellm/_utils.py
@@ -248,6 +248,7 @@ def convert_litellm_messages_to_genai_format(
                         try:
                             arguments = json.loads(arguments)
                         except Exception:
+                            # If arguments are not valid JSON, keep the original string
                             pass
 
                     parts.append(
@@ -306,6 +307,7 @@ def extract_output_from_litellm_response(response: Any) -> List:
                     try:
                         arguments = json.loads(arguments)
                     except Exception:
+                        # If arguments are not valid JSON, keep the original string
                         pass
 
                 parts.append(
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-litellm/src/opentelemetry/instrumentation/litellm/_wrapper.py b/instrumentation-loongsuite/loongsuite-instrumentation-litellm/src/opentelemetry/instrumentation/litellm/_wrapper.py
@@ -43,12 +43,12 @@
 logger = logging.getLogger(__name__)
 
 # Environment variable to control instrumentation
-LITELLM_INSTRUMENTATION_ENABLED = "ARMS_LITELLM_INSTRUMENTATION_ENABLED"
+ENABLE_LITELLM_INSTRUMENTOR = "ENABLE_LITELLM_INSTRUMENTOR"
 
 
 def _is_instrumentation_enabled() -> bool:
     """Check if instrumentation is enabled via environment variable."""
-    enabled = os.getenv(LITELLM_INSTRUMENTATION_ENABLED, "true").lower()
+    enabled = os.getenv(ENABLE_LITELLM_INSTRUMENTOR, "true").lower()
     return enabled != "false"
 
 
@@ -82,8 +82,6 @@ def __call__(self, *args, **kwargs):
         if is_stream and "stream_options" not in kwargs:
             kwargs["stream_options"] = {"include_usage": True}
 
-        # For streaming, we need special handling
-
         # For streaming, we need special handling
         if is_stream:
             # Create invocation object
@@ -96,6 +94,7 @@ def __call__(self, *args, **kwargs):
                     context.set_value(SUPPRESS_LLM_SDK_KEY, True)
                 )
             except Exception:
+                # If context setting fails, continue without suppression token
                 pass
 
             # Start LLM invocation
@@ -110,11 +109,14 @@ def __call__(self, *args, **kwargs):
                 stream_wrapper = StreamWrapper(
                     stream=response,
                     span=invocation.span,  # For TTFT tracking
-                    callback=lambda span,
+                    callback=None,
+                )
+                stream_wrapper.callback = (
+                    lambda span,
                     last_chunk,
                     error: self._handle_stream_end_with_handler(
                         invocation, last_chunk, error, stream_wrapper
-                    ),
+                    )
                 )
                 response = stream_wrapper
 
@@ -143,6 +145,7 @@ def __call__(self, *args, **kwargs):
                     context.set_value(SUPPRESS_LLM_SDK_KEY, True)
                 )
             except Exception:
+                # If context setting fails, continue without suppression token
                 pass
 
             # Start LLM invocation (handler creates and manages span)
@@ -242,6 +245,7 @@ def _handle_stream_end_with_handler(
                                 try:
                                     arguments = json.loads(arguments)
                                 except Exception:
+                                    # If arguments are not valid JSON, keep the original string
                                     pass
 
                             parts.append(
@@ -302,8 +306,12 @@ def _handle_stream_end_with_handler(
                 self._handler.fail_llm(
                     invocation, Error(message=str(e), type=type(e))
                 )
-            except Exception:
-                pass
+            except Exception as handler_error:
+                # Swallow exceptions from telemetry failure reporting, but log them for diagnostics.
+                logger.debug(
+                    "Error while reporting LLM failure in _handle_stream_end_with_handler: %s",
+                    handler_error,
+                )
 
 
 class AsyncCompletionWrapper:
@@ -347,6 +355,7 @@ async def __call__(self, *args, **kwargs):
                     context.set_value(SUPPRESS_LLM_SDK_KEY, True)
                 )
             except Exception:
+                # If context setting fails, continue without suppression token
                 pass
 
             # Start LLM invocation
@@ -360,11 +369,14 @@ async def __call__(self, *args, **kwargs):
                 stream_wrapper = AsyncStreamWrapper(
                     stream=response,
                     span=invocation.span,  # For TTFT tracking
-                    callback=lambda span,
+                    callback=None,
+                )
+                stream_wrapper.callback = (
+                    lambda span,
                     last_chunk,
                     error: self._handle_stream_end_with_handler(
                         invocation, last_chunk, error, stream_wrapper
-                    ),
+                    )
                 )
                 response = stream_wrapper
 
@@ -394,6 +406,7 @@ async def __call__(self, *args, **kwargs):
                     context.set_value(SUPPRESS_LLM_SDK_KEY, True)
                 )
             except Exception:
+                # If context setting fails, continue without suppression token
                 pass
 
             # Start LLM invocation
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-litellm/test-requirements.txt b/instrumentation-loongsuite/loongsuite-instrumentation-litellm/test-requirements.txt
@@ -1,4 +1,4 @@
-litellm>=1.79.0
+litellm>=1.0.0
 pytest
 pytest-asyncio
 openai
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-litellm/tests/test_error_handling.py b/instrumentation-loongsuite/loongsuite-instrumentation-litellm/tests/test_error_handling.py
@@ -33,6 +33,13 @@ def setUp(self):
         LiteLLMInstrumentor().instrument(
             tracer_provider=self.tracer_provider,
         )
+        # Use model aliases
+        litellm.model_alias_map = {
+            "qwen-turbo": "openai/qwen-turbo",
+            "qwen-plus": "openai/qwen-plus",
+        }
+        if os.environ.get("DASHSCOPE_API_KEY"):
+            os.environ["OPENAI_API_KEY"] = os.environ["DASHSCOPE_API_KEY"]
 
     def tearDown(self):
         super().tearDown()
@@ -46,23 +53,32 @@ def test_authentication_failure(self):
         Test handling of authentication failures.
         """
 
-        # Temporarily set invalid credentials
-        original_dashscope_key = os.environ.get("DASHSCOPE_API_KEY")
-        os.environ["DASHSCOPE_API_KEY"] = "invalid-key-12345"
+        # Temporarily set invalid credentials and base to trigger fast failure
+        original_keys = {
+            "DASHSCOPE_API_KEY": os.environ.get("DASHSCOPE_API_KEY"),
+            "OPENAI_API_KEY": os.environ.get("OPENAI_API_KEY"),
+            "OPENAI_API_BASE": os.environ.get("OPENAI_API_BASE"),
+        }
+        os.environ["DASHSCOPE_API_KEY"] = "invalid"
+        os.environ["OPENAI_API_KEY"] = "invalid"
+        os.environ["OPENAI_API_BASE"] = "http://localhost:1"
 
         try:
             litellm.completion(
-                model="dashscope/qwen-turbo",
+                model="qwen-turbo",
                 messages=[{"role": "user", "content": "Hello"}],
+                num_retries=0,
             )
-            self.fail("Expected authentication error but call succeeded")
+            self.fail("Expected failure but call succeeded")
         except Exception as e:
             self.assertIsNotNone(e)
         finally:
-            if original_dashscope_key:
-                os.environ["DASHSCOPE_API_KEY"] = original_dashscope_key
-            else:
-                os.environ.pop("DASHSCOPE_API_KEY", None)
+            # Restore original environment
+            for key, val in original_keys.items():
+                if val:
+                    os.environ[key] = val
+                else:
+                    os.environ.pop(key, None)
 
         spans = self.get_finished_spans()
         self.assertEqual(len(spans), 1, "Should create 1 span even on error")
@@ -122,7 +138,7 @@ def test_network_timeout(self):
 
         try:
             litellm.completion(
-                model="dashscope/qwen-turbo",
+                model="qwen-turbo",
                 messages=[{"role": "user", "content": "Tell me a long story"}],
                 timeout=0.001,
             )
@@ -145,7 +161,7 @@ def test_max_tokens_exceeded(self):
         )
 
         response = litellm.completion(
-            model="dashscope/qwen-turbo",
+            model="qwen-turbo",
             messages=[{"role": "user", "content": "Write a 500 word essay"}],
             max_tokens=2,
         )
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-litellm/tests/test_retry.py b/instrumentation-loongsuite/loongsuite-instrumentation-litellm/tests/test_retry.py
@@ -69,6 +69,13 @@ def setUp(self):
         LiteLLMInstrumentor().instrument(
             tracer_provider=self.tracer_provider,
         )
+        # Use model aliases
+        litellm.model_alias_map = {
+            "qwen-turbo": "openai/qwen-turbo",
+            "qwen-plus": "openai/qwen-plus",
+        }
+        if os.environ.get("DASHSCOPE_API_KEY"):
+            os.environ["OPENAI_API_KEY"] = os.environ["DASHSCOPE_API_KEY"]
 
     def tearDown(self):
         super().tearDown()
@@ -84,7 +91,7 @@ def test_completion_with_retries_success(self):
 
         # Business demo: Completion with retry wrapper (success case)
         response = litellm.completion_with_retries(
-            model="dashscope/qwen-turbo",
+            model="qwen-turbo",
             messages=[
                 {"role": "user", "content": "What is 1+1? Answer briefly."}
             ],
@@ -121,7 +128,8 @@ def test_async_completion_with_retries(self):
 
         async def run_async_retry():
             response = await litellm.acompletion_with_retries(
-                model="dashscope/qwen-turbo",
+                model="qwen-turbo",
+                custom_llm_provider="openai",
                 messages=[{"role": "user", "content": "Name a color."}],
                 temperature=0.0,
             )
@@ -158,7 +166,7 @@ def test_completion_with_custom_retry_config(self):
         # This demo sets custom retry parameters
         # Note: LiteLLM's retry mechanism might use different parameter names
         response = litellm.completion_with_retries(
-            model="dashscope/qwen-turbo",
+            model="qwen-turbo",
             messages=[
                 {"role": "user", "content": "What is the capital of China?"}
             ],
@@ -180,7 +188,7 @@ def test_retry_with_streaming(self):
 
         # Business demo: Streaming completion with retry wrapper
         response = litellm.completion_with_retries(
-            model="dashscope/qwen-turbo",
+            model="qwen-turbo",
             messages=[{"role": "user", "content": "Count to 3."}],
             stream=True,
             temperature=0.0,
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-litellm/tests/test_stream_completion.py b/instrumentation-loongsuite/loongsuite-instrumentation-litellm/tests/test_stream_completion.py
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-litellm/tests/test_sync_completion.py b/instrumentation-loongsuite/loongsuite-instrumentation-litellm/tests/test_sync_completion.py
diff --git a/instrumentation-loongsuite/loongsuite-instrumentation-litellm/tests/test_tool_calls.py b/instrumentation-loongsuite/loongsuite-instrumentation-litellm/tests/test_tool_calls.py

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-litellm>=1.79.0`
	`1`	`+litellm>=1.0.0`
`2`	`2`	`pytest`
`3`	`3`	`pytest-asyncio`
`4`	`4`	`openai`