|
| 1 | +# Fixed Async Streaming Example |
| 2 | + |
| 3 | +## The Problem |
| 4 | +Using both `@trace()` decorator and `trace_async_openai()` creates duplicate traces that break tests. |
| 5 | + |
| 6 | +## The Solution |
| 7 | +Use **ONLY** `trace_async_openai()` - remove all decorators: |
| 8 | + |
| 9 | +```python |
| 10 | +import asyncio |
| 11 | +from openai import AsyncOpenAI |
| 12 | +from openlayer.lib import trace_async_openai |
| 13 | + |
| 14 | +class say_hi: |
| 15 | + def __init__(self): |
| 16 | + self.openai_client = trace_async_openai(AsyncOpenAI()) |
| 17 | + |
| 18 | + # ❌ Remove @trace() or @trace_async() decorators |
| 19 | + async def hi(self, cur_str: str): |
| 20 | + messages = [ |
| 21 | + { |
| 22 | + "role": "system", |
| 23 | + "content": "say hi !", |
| 24 | + }, |
| 25 | + {"role": "user", "content": cur_str} |
| 26 | + ] |
| 27 | + temperature = 0 |
| 28 | + |
| 29 | + # This single call will be properly traced by trace_async_openai |
| 30 | + response = await self.openai_client.chat.completions.create( |
| 31 | + model="gpt-3.5-turbo-16k", |
| 32 | + messages=messages, |
| 33 | + temperature=temperature, |
| 34 | + max_tokens=100, |
| 35 | + stream=True, |
| 36 | + ) |
| 37 | + |
| 38 | + complete_answer = "" |
| 39 | + async for chunk in response: |
| 40 | + delta = chunk.choices[0].delta |
| 41 | + if hasattr(delta, "content") and delta.content: |
| 42 | + chunk_content = delta.content |
| 43 | + complete_answer += chunk_content |
| 44 | + yield chunk_content |
| 45 | + |
| 46 | +# Usage remains the same |
| 47 | +obj_ = say_hi() |
| 48 | + |
| 49 | +print("Streaming response:") |
| 50 | +async for chunk in obj_.hi("hi you are an async assistant"): |
| 51 | + print(chunk, end="") |
| 52 | +print("\nStreaming finished.") |
| 53 | +``` |
| 54 | + |
| 55 | +## What This Fixes |
| 56 | +- ✅ **Single trace only** - no more duplicate requests |
| 57 | +- ✅ **Tests work properly** - only one request to test against |
| 58 | +- ✅ **Complete tracing info** - input, output, tokens, cost, timing all captured |
| 59 | +- ✅ **Proper async streaming** - chunks yielded correctly |
| 60 | + |
| 61 | +## Why This Works |
| 62 | +The `trace_async_openai()` wrapper is specifically designed for async OpenAI calls and: |
| 63 | +- Automatically captures function input (cur_str parameter) |
| 64 | +- Traces the complete streaming response |
| 65 | +- Includes OpenAI-specific metrics (tokens, cost, model) |
| 66 | +- Maintains proper async context |
| 67 | +- **Generates only ONE trace entry** |
| 68 | + |
| 69 | +## Key Insight |
| 70 | +Your sync version works because you're not double-tracing. Apply the same principle to async: **use only one tracing method, not both together**. |
0 commit comments