Merge branch 'main' into add-tool-call-parsing

hassiebp · web-flow · commit c167ca825d22 · 2025-09-02T12:35:34.000+02:00
diff --git a/langfuse/_client/client.py b/langfuse/_client/client.py
@@ -2996,3 +2996,14 @@ def _url_encode(self, url: str, *, is_url_param: Optional[bool] = False) -> str:
         # we need add safe="" to force escaping of slashes
         # This is necessary for prompts in prompt folders
         return urllib.parse.quote(url, safe="")
+    
+    def clear_prompt_cache(self):
+        """
+        Clear the entire prompt cache, removing all cached prompts.
+        
+        This method is useful when you want to force a complete refresh of all
+        cached prompts, for example after major updates or when you need to
+        ensure the latest versions are fetched from the server.
+        """
+        if self._resources is not None:
+            self._resources.prompt_cache.clear()
diff --git a/langfuse/_client/environment_variables.py b/langfuse/_client/environment_variables.py
@@ -128,3 +128,13 @@
 
 **Default value**: ``5``
 """
+
+LANGFUSE_PROMPT_CACHE_DEFAULT_TTL_SECONDS = "LANGFUSE_PROMPT_CACHE_DEFAULT_TTL_SECONDS"
+"""
+.. envvar: LANGFUSE_PROMPT_CACHE_DEFAULT_TTL_SECONDS
+
+Controls the default time-to-live (TTL) in seconds for cached prompts.
+This setting determines how long prompt responses are cached before they expire.
+
+**Default value**: ``60``
+"""
diff --git a/langfuse/_utils/prompt_cache.py b/langfuse/_utils/prompt_cache.py
@@ -6,10 +6,14 @@
 from queue import Empty, Queue
 from threading import Thread
 from typing import Callable, Dict, List, Optional, Set
+import os
 
 from langfuse.model import PromptClient
+from langfuse._client.environment_variables import (
+    LANGFUSE_PROMPT_CACHE_DEFAULT_TTL_SECONDS
+)
 
-DEFAULT_PROMPT_CACHE_TTL_SECONDS = 60
+DEFAULT_PROMPT_CACHE_TTL_SECONDS = int(os.getenv(LANGFUSE_PROMPT_CACHE_DEFAULT_TTL_SECONDS, 60))
 
 DEFAULT_PROMPT_CACHE_REFRESH_WORKERS = 1
 
@@ -162,6 +166,10 @@ def add_refresh_prompt_task(self, key: str, fetch_func: Callable[[], None]) -> N
         self._log.debug(f"Submitting refresh task for key: {key}")
         self._task_manager.add_task(key, fetch_func)
 
+    def clear(self) -> None:
+        """Clear the entire prompt cache, removing all cached prompts."""
+        self._cache.clear()
+
     @staticmethod
     def generate_cache_key(
         name: str, *, version: Optional[int], label: Optional[str]
diff --git a/langfuse/langchain/CallbackHandler.py b/langfuse/langchain/CallbackHandler.py
@@ -3,8 +3,10 @@
 
 import pydantic
 from opentelemetry import context, trace
+from opentelemetry.context import _RUNTIME_CONTEXT
 
 from langfuse._client.attributes import LangfuseOtelSpanAttributes
+from langfuse._client.client import Langfuse
 from langfuse._client.get_client import get_client
 from langfuse._client.span import (
     LangfuseAgent,
@@ -272,7 +274,7 @@ def on_chain_start(
                 serialized, "chain", **kwargs
             )
 
-            span = self.client.start_observation(
+            span = self._get_parent_observation(parent_run_id).start_observation(
                 name=span_name,
                 as_type=observation_type,
                 metadata=span_metadata,
@@ -336,6 +338,22 @@ def _deregister_langfuse_prompt(self, run_id: Optional[UUID]) -> None:
         if run_id is not None and run_id in self.prompt_to_parent_run_map:
             del self.prompt_to_parent_run_map[run_id]
 
+    def _get_parent_observation(
+        self, parent_run_id: Optional[UUID]
+    ) -> Union[
+        Langfuse,
+        LangfuseAgent,
+        LangfuseChain,
+        LangfuseGeneration,
+        LangfuseRetriever,
+        LangfuseSpan,
+        LangfuseTool,
+    ]:
+        if parent_run_id and parent_run_id in self.runs:
+            return self.runs[parent_run_id]
+
+        return self.client
+
     def _attach_observation(
         self,
         run_id: UUID,
@@ -369,7 +387,18 @@ def _detach_observation(
         token = self.context_tokens.pop(run_id, None)
 
         if token:
-            context.detach(token)
+            try:
+                # Directly detach from runtime context to avoid error logging
+                _RUNTIME_CONTEXT.detach(token)
+            except Exception:
+                # Context detach can fail in async scenarios - this is expected and safe to ignore
+                # The span itself was properly ended and tracing data is correctly captured
+                #
+                # Examples:
+                # 1. Token created in one async task/thread, detached in another
+                # 2. Context already detached by framework or other handlers
+                # 3. Runtime context state mismatch in concurrent execution
+                pass
 
         return cast(
             Union[
@@ -591,7 +620,7 @@ def on_tool_start(
                 serialized, "tool", **kwargs
             )
 
-            span = self.client.start_observation(
+            span = self._get_parent_observation(parent_run_id).start_observation(
                 name=self.get_langchain_run_name(serialized, **kwargs),
                 as_type=observation_type,
                 input=input_str,
@@ -626,8 +655,7 @@ def on_retriever_start(
             observation_type = self._get_observation_type_from_serialized(
                 serialized, "retriever", **kwargs
             )
-
-            span = self.client.start_observation(
+            span = self._get_parent_observation(parent_run_id).start_observation(
                 name=span_name,
                 as_type=observation_type,
                 metadata=span_metadata,
@@ -753,7 +781,9 @@ def __on_llm_action(
                 "prompt": registered_prompt,
             }
 
-            generation = self.client.start_observation(as_type="generation", **content)  # type: ignore
+            generation = self._get_parent_observation(parent_run_id).start_observation(
+                as_type="generation", **content
+            )  # type: ignore
             self._attach_observation(run_id, generation)
 
             self.last_trace_id = self.runs[run_id].trace_id
diff --git a/langfuse/version.py b/langfuse/version.py
@@ -1,3 +1,3 @@
 """@private"""
 
-__version__ = "3.3.2"
+__version__ = "3.3.3"
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 name = "langfuse"
 
-version = "3.3.2"
+version = "3.3.3"
 description = "A client library for accessing langfuse"
 authors = ["langfuse <developers@langfuse.com>"]
 license = "MIT"
diff --git a/tests/test_prompt.py b/tests/test_prompt.py

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`"""@private"""`
`2`	`2`
`3`		`-__version__ = "3.3.2"`
	`3`	`+__version__ = "3.3.3"`