refactor(tracing): drop span_name from Fireworks adapter

xzrderek · xzrderek · commit 0c2f7dbe8b59 · 2026-04-20T21:44:50.000-07:00
Follow-up to the previous commit: instead of accepting span_name and
raising NotImplementedError, just remove the parameter. Each /v1/traces
row is one LLM call with no nested span structure, so span_name has no
meaningful semantics here. Callers that still pass span_name now get a
clear TypeError at call time.

Removed from TraceDictConverter.__call__, convert_trace_dict_to_evaluation_row,
extract_messages_from_trace_dict, and FireworksTracingAdapter.get_evaluation_rows.
Langfuse adapter is unaffected (it defines its own TraceDictConverter).

Made-with: Cursor
diff --git a/eval_protocol/adapters/fireworks_tracing.py b/eval_protocol/adapters/fireworks_tracing.py
@@ -33,14 +33,12 @@ def __call__(
         self,
         trace: Dict[str, Any],
         include_tool_calls: bool,
-        span_name: Optional[str],
     ) -> Optional[EvaluationRow]:
         """Convert a trace dictionary to an EvaluationRow.
 
         Args:
             trace: The trace dictionary to convert
             include_tool_calls: Whether to include tool calling information
-            span_name: Optional span name to extract messages from
 
         Returns:
             EvaluationRow or None if the trace should be skipped
@@ -49,34 +47,20 @@ def __call__(
 
 
 def convert_trace_dict_to_evaluation_row(
-    trace: Dict[str, Any], include_tool_calls: bool = True, span_name: Optional[str] = None
+    trace: Dict[str, Any], include_tool_calls: bool = True
 ) -> Optional[EvaluationRow]:
     """Convert a trace dictionary (from proxy API) to EvaluationRow format.
 
     Args:
         trace: Trace dictionary from Fireworks proxy API
         include_tool_calls: Whether to include tool calling information
-        span_name: Not supported by this converter. Each row returned by the
-            Fireworks tracing endpoint is a single LLM call, so there is no
-            nested-span structure to walk. Pass a custom ``TraceDictConverter``
-            via ``get_evaluation_rows(..., converter=...)`` if you need
-            span-specific extraction logic.
 
     Returns:
         EvaluationRow or None if conversion fails
-
-    Raises:
-        NotImplementedError: If ``span_name`` is provided.
     """
-    if span_name:
-        raise NotImplementedError(
-            "span_name is not supported by the default Fireworks tracing converter. "
-            "Each trace row is already a single LLM call; provide a custom "
-            "TraceDictConverter to get_evaluation_rows() for span-aware logic."
-        )
     try:
         # Extract messages from trace input and output
-        messages = extract_messages_from_trace_dict(trace, include_tool_calls, span_name)
+        messages = extract_messages_from_trace_dict(trace, include_tool_calls)
 
         # Extract tools if available. `Input` carries the request payload,
         # which optionally includes a `tools` array when tool-calling is used.
@@ -135,9 +119,7 @@ def convert_trace_dict_to_evaluation_row(
         return None
 
 
-def extract_messages_from_trace_dict(
-    trace: Dict[str, Any], include_tool_calls: bool = True, span_name: Optional[str] = None
-) -> List[Message]:
+def extract_messages_from_trace_dict(trace: Dict[str, Any], include_tool_calls: bool = True) -> List[Message]:
     """Extract messages from a Fireworks trace row.
 
     The Fireworks tracing endpoint returns one row per LLM call with the
@@ -147,22 +129,10 @@ def extract_messages_from_trace_dict(
     Args:
         trace: Trace dictionary from proxy API
         include_tool_calls: Whether to include tool calling information
-        span_name: Not supported. Pass a custom ``TraceDictConverter`` to
-            ``FireworksTracingAdapter.get_evaluation_rows`` if you need
-            span-specific extraction.
 
     Returns:
         List of Message objects
-
-    Raises:
-        NotImplementedError: If ``span_name`` is provided.
     """
-    if span_name:
-        raise NotImplementedError(
-            "span_name is not supported by extract_messages_from_trace_dict for "
-            "Fireworks traces; each row is already a single LLM call."
-        )
-
     messages: List[Message] = []
     try:
         # `Input` carries the request messages; `Output` carries the
@@ -364,7 +334,6 @@ def get_evaluation_rows(
         include_tool_calls: bool = True,
         sleep_between_gets: float = 0.1,
         max_retries: int = 3,
-        span_name: Optional[str] = None,
         converter: Optional[TraceDictConverter] = None,
     ) -> List[EvaluationRow]:
         """Pull traces from Langfuse via proxy and convert to EvaluationRow format.
@@ -387,9 +356,6 @@ def get_evaluation_rows(
             include_tool_calls: Whether to include tool calling traces
             sleep_between_gets: Sleep time between polling attempts (default: 2.5s)
             max_retries: Max retry attempts used by proxy (default: 3)
-            span_name: Only supported when a custom ``converter`` is supplied.
-                The default Fireworks converter does not walk nested spans
-                (each trace row is already a single LLM call).
             converter: Optional custom converter implementing TraceDictConverter protocol.
                 If provided, this will be used instead of the default conversion logic.
 
@@ -398,18 +364,11 @@ def get_evaluation_rows(
 
         Raises:
             ValueError: If tags list is empty or no ``rollout_id`` tag is present.
-            NotImplementedError: If ``span_name`` is provided without a custom ``converter``.
         """
         # Validate that tags are provided
         if not tags or len(tags) == 0:
             raise ValueError("At least one tag is required to fetch traces")
 
-        if span_name and converter is None:
-            raise NotImplementedError(
-                "span_name is not supported by the default Fireworks tracing converter. "
-                "Pass a custom converter=TraceDictConverter(...) if you need span-aware logic."
-            )
-
         # Pull out rollout_id only, since that is the task-level id needed to fetch traces.
         rollout_id = next(
             (t.split(":", 1)[1] for t in tags if t.startswith("rollout_id:")),
@@ -483,9 +442,9 @@ def get_evaluation_rows(
         for trace in traces:
             try:
                 if converter:
-                    eval_row = converter(trace, include_tool_calls, span_name)
+                    eval_row = converter(trace, include_tool_calls)
                 else:
-                    eval_row = convert_trace_dict_to_evaluation_row(trace, include_tool_calls, span_name)
+                    eval_row = convert_trace_dict_to_evaluation_row(trace, include_tool_calls)
                 if eval_row:
                     eval_rows.append(eval_row)
             except (AttributeError, ValueError, KeyError) as e: