VectorInstitute
diff --git a/‎README.md‎
Lines changed: 3 additions & 10 deletions b/‎README.md‎
Lines changed: 3 additions & 10 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/2_frameworks/1_react_rag/langfuse_gradio.py‎
Lines changed: 11 additions & 5 deletions b/‎src/2_frameworks/1_react_rag/langfuse_gradio.py‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎src/2_frameworks/2_multi_agent/efficient.py‎
Lines changed: 24 additions & 10 deletions b/‎src/2_frameworks/2_multi_agent/efficient.py‎
Lines changed: 24 additions & 10 deletions
diff --git a/‎src/2_frameworks/2_multi_agent/efficient_multiple_kbs.py‎
Lines changed: 18 additions & 15 deletions b/‎src/2_frameworks/2_multi_agent/efficient_multiple_kbs.py‎
Lines changed: 18 additions & 15 deletions
diff --git a/‎src/2_frameworks/2_multi_agent/fan_out.py‎
Lines changed: 45 additions & 31 deletions b/‎src/2_frameworks/2_multi_agent/fan_out.py‎
Lines changed: 45 additions & 31 deletions
@@ -38,16 +38,11 @@ A minimal Reason-and-Act (ReAct) agent for knowledge retrieval, implemented with
 - **[1.1 ReAct Agent for RAG](src/1_basics/1_react_rag/README.md)**
   Basic ReAct agent for step-by-step retrieval and answer generation.
 
-
 ## Getting Started
 
-Set your API keys in `.env`. Use `.env.example` as a template.
-
-```bash
-cp -v .env.example .env
-```
+If you successfully created a workspace in Coder, you should already have a `.env` file in the repo.
 
-Run integration tests to validate that your API keys are set up correctly.
+In that case you can verify that the API keys work by running integration tests with the following command:
 
 ```bash
 uv run --env-file .env pytest -sv tests/tool_tests/test_integration.py
@@ -91,7 +86,6 @@ As noted above, these are unnecessarily verbose for real applications.
 # uv run --env-file .env gradio src/1_basics/1_react_rag/app.py
 ```
 
-
 ### 2. Frameworks
 
 Reason-and-Act Agent without the boilerplate- using the OpenAI Agent SDK.
@@ -105,15 +99,14 @@ Multi-agent examples, also via the OpenAI Agent SDK.
 
 ```bash
 uv run --env-file .env gradio src/2_frameworks/2_multi_agent/efficient.py
-# Verbose option- greater control over the agent flow, but less flexible.
+# Verbose option - greater control over the agent flow, but less flexible.
 # uv run --env-file .env gradio src/2_frameworks/2_multi_agent/verbose.py
 ```
 
 Python Code Interpreter demo- using the OpenAI Agent SDK, E2B for secure code sandbox, and LangFuse for observability. Refer to [src/2_frameworks/3_code_interpreter/README.md](src/2_frameworks/3_code_interpreter/README.md) for details.
 
 MCP server integration example also via OpenAI Agents SDK with Gradio and Langfuse tracing. Refer to [src/2_frameworks/4_mcp/README.md](src/2_frameworks/4_mcp/README.md) for more details.
 
-
 ### 3. Evals
 
 Synthetic data.
 
@@ -44,6 +44,7 @@ dev = [
     "nbqa>=1.9.1",
     "pip-audit>=2.7.3",
     "pre-commit>=4.1.0",
+    "pymupdf>=1.26.7",
     "pytest>=8.3.4",
     "pytest-asyncio>=1.2.0",
     "pytest-cov>=7.0.0",
 
@@ -10,6 +10,7 @@
 import gradio as gr
 from dotenv import load_dotenv
 from gradio.components.chatbot import ChatMessage
+from langfuse import propagate_attributes
 
 from src.prompts import REACT_INSTRUCTIONS
 from src.utils import (
@@ -51,9 +52,14 @@ async def _main(
         ),
     )
 
-    with langfuse_client.start_as_current_span(name="Agents-SDK-Trace") as span:
-        span.update(input=query)
-
+    with (
+        langfuse_client.start_as_current_observation(
+            name="Agents-SDK-Trace", as_type="agent", input=query
+        ) as obs,
+        propagate_attributes(
+            session_id=session.session_id  # Propagate session_id to all child observations
+        ),
+    ):
         # Run the agent in streaming mode to get and display intermediate outputs
         result_stream = agents.Runner.run_streamed(
             main_agent, input=query, session=session
@@ -64,7 +70,7 @@ async def _main(
             if len(turn_messages) > 0:
                 yield turn_messages
 
-        span.update(output=result_stream.final_output)
+        obs.update(output=result_stream.final_output)
 
     pretty_print(turn_messages)
     yield turn_messages
@@ -92,7 +98,7 @@ async def _main(
             [
                 "At which university did the SVP Software Engineering"
                 " at Apple (as of June 2025) earn their engineering degree?",
-            ]
+            ],
         ],
         title="2.1: ReAct for Retrieval-Augmented Generation with OpenAI Agent SDK + LangFuse",
     )
 
@@ -13,6 +13,7 @@
 import gradio as gr
 from dotenv import load_dotenv
 from gradio.components.chatbot import ChatMessage
+from langfuse import propagate_attributes
 
 from src.prompts import REACT_INSTRUCTIONS
 from src.utils import (
@@ -39,20 +40,28 @@ async def _main(
     session = get_or_create_session(history, session_state)
 
     # Use the main agent as the entry point- not the worker agent.
-    with langfuse_client.start_as_current_span(name="Agents-SDK-Trace") as span:
-        span.update(input=query)
-
+    with (
+        langfuse_client.start_as_current_observation(
+            name="Orchestrator-Worker", as_type="agent", input=query
+        ) as obs,
+        propagate_attributes(
+            session_id=session.session_id  # Propagate session_id to all child observations
+        ),
+    ):
         # Run the agent in streaming mode to get and display intermediate outputs
         result_stream = agents.Runner.run_streamed(
-            main_agent, input=query, session=session
+            main_agent,
+            input=query,
+            session=session,
+            max_turns=30,  # Increase max turns to support more complex queries
         )
 
         async for _item in result_stream.stream_events():
             turn_messages += oai_agent_stream_to_gradio_messages(_item)
             if len(turn_messages) > 0:
                 yield turn_messages
 
-        span.update(output=result_stream.final_output)
+        obs.update(output=result_stream.final_output)
 
 
 if __name__ == "__main__":
@@ -81,7 +90,11 @@ async def _main(
         instructions=(
             "You are a search agent. You receive a single search query as input. "
             "Use the search tool to perform a search, then produce a concise "
-            "'search summary' of the key findings. Do NOT return raw search results."
+            "'search summary' of the key findings. "
+            "For every fact you include in the summary, ALWAYS include a citation "
+            "both in-line and at the end of the summary as a numbered list. The "
+            "citation at the end should include relevant metadata from the search "
+            "results. Do NOT return raw search results. "
         ),
         tools=[
             agents.function_tool(client_manager.knowledgebase.search_knowledgebase),
@@ -118,12 +131,13 @@ async def _main(
         **COMMON_GRADIO_CONFIG,
         examples=[
             [
-                "At which university did the SVP Software Engineering"
-                " at Apple (as of June 2025) earn their engineering degree?"
+                "Write a structured report on the history of AI, covering: "
+                "1) the start in the 50s, 2) the first AI winter, 3) the second AI winter, "
+                "4) the modern AI boom, 5) the evolution of AI hardware, and "
+                "6) the societal impacts of modern AI"
             ],
             [
-                "How does the annual growth in the 50th-percentile income "
-                "in the US compare with that in Canada?",
+                "Compare the box office performance of 'Oppenheimer' with the third Avatar movie"
             ],
         ],
         title="2.2.2: Multi-Agent Orchestrator-worker for Retrieval-Augmented Generation",
 
@@ -7,6 +7,7 @@
 import gradio as gr
 from dotenv import load_dotenv
 from gradio.components.chatbot import ChatMessage
+from langfuse import propagate_attributes
 
 from src.utils import (
     oai_agent_stream_to_gradio_messages,
@@ -36,9 +37,14 @@ async def _main(
     session = get_or_create_session(history, session_state)
 
     # Use the main agent as the entry point- not the worker agent.
-    with langfuse_client.start_as_current_span(name="Agents-SDK-Trace") as span:
-        span.update(input=query)
-
+    with (
+        langfuse_client.start_as_current_observation(
+            name="Orchestrator-Worker", as_type="agent", input=query
+        ) as obs,
+        propagate_attributes(
+            session_id=session.session_id  # Propagate session_id to all child observations
+        ),
+    ):
         # Run the agent in streaming mode to get and display intermediate outputs
         result_stream = agents.Runner.run_streamed(
             main_agent,
@@ -52,7 +58,7 @@ async def _main(
             if len(turn_messages) > 0:
                 yield turn_messages
 
-        span.update(output=result_stream.final_output)
+        obs.update(output=result_stream.final_output)
 
 
 if __name__ == "__main__":
@@ -173,26 +179,23 @@ async def _main(
         model=agents.OpenAIChatCompletionsModel(
             model=planner_model, openai_client=client_manager.openai_client
         ),
+        # NOTE: enabling parallel tool calls here can sometimes lead to issues with
+        # with invalid arguments being passed to the search agent.
+        model_settings=agents.ModelSettings(parallel_tool_calls=False),
     )
 
     demo = gr.ChatInterface(
         _main,
         **COMMON_GRADIO_CONFIG,
         examples=[
             [
-                "At which university did the SVP Software Engineering"
-                " at Apple (as of June 2025) earn their engineering degree?"
-            ],
-            [
-                "How does the annual growth in the 50th-percentile income "
-                "in the US compare with that in Canada?",
+                "Write a structured report on the history of AI, covering: "
+                "1) the start in the 50s, 2) the first AI winter, 3) the second AI winter, "
+                "4) the modern AI boom, 5) the evolution of AI hardware, and "
+                "6) the societal impacts of modern AI"
             ],
             [
-                "Provide a complete list of all countries that have a population "
-                "over 100 million in 2026, that contain over 500 billion cubic meters "
-                "of internal fresh water for the year 2021, and have a mortality rate "
-                "less than the birth rate for the year 2021. The order of the list "
-                "should be based on the largest population size in 2026."
+                "Compare the box office performance of 'Oppenheimer' with the third Avatar movie"
             ],
         ],
         title="2.2.3: Multi-Agent Orchestrator-worker for Retrieval-Augmented Generation with Multiple Tools",
 
@@ -241,7 +241,9 @@ async def process_document_pair(document_pair: DocumentPair) -> ConflictSummary
 
     Returns None if exception is encountered.
     """
-    with langfuse_client.start_as_current_observation(name="Conflict- suggest") as span:
+    with langfuse_client.start_as_current_observation(
+        name="Conflict - suggest", as_type="agent"
+    ) as obs:
         try:
             result = await agents.Runner.run(
                 worker_agent, input=document_pair.get_prompt()
@@ -251,7 +253,7 @@ async def process_document_pair(document_pair: DocumentPair) -> ConflictSummary
             print(e)
             return None
 
-        span.update(input=document_pair, output=output)
+        obs.update(input=document_pair, output=output)
 
     return output
 
@@ -283,7 +285,9 @@ async def process_one_review(
 
     Return None upon error.
     """
-    with langfuse_client.start_as_current_observation(name="Review proposal") as span:
+    with langfuse_client.start_as_current_observation(
+        name="Review proposal", as_type="agent"
+    ) as obs:
         try:
             result = await agents.Runner.run(
                 conflict_review_agent, input=conflicted_document.model_dump_json()
@@ -293,7 +297,7 @@ async def process_one_review(
             print(e)
             return None
 
-        span.update(input=conflicted_document, output=output)
+        obs.update(input=conflicted_document, output=output)
 
     return output
 
@@ -380,33 +384,43 @@ async def process_conflict_reviews(
     assert isinstance(dataset_dict, datasets.DatasetDict)
     documents = list(dataset_dict["train"])[: args.num_rows]
 
-    # Run O(N^2) agents on N documents to identify pairwise e.g., conflicts.
-    document_pairs = build_document_pairs(documents)  # type: ignore[arg-type]
-    print(f"Built {len(document_pairs)} pair(s) from {len(documents)} document(s).")
-
-    with langfuse_client.start_as_current_span(name="Conflicts- Pairwise") as span:
-        flagged_pairs = asyncio.get_event_loop().run_until_complete(
-            process_fan_out(document_pairs)
-        )
-        span.update(
-            input=args.source_dataset, output=f"{len(flagged_pairs)} pairs identified."
-        )
-
-    # Collect conflicts related to each document.
-    # from O(N^2) pairs to O(N) summarized per-document conflicts.
-    conflicted_documents = group_conflicts(flagged_pairs)
+    with langfuse_client.start_as_current_observation(
+        name="Fan-Out", as_type="chain", input=args.source_dataset
+    ) as span:
+        # Run O(N^2) agents on N documents to identify pairwise e.g., conflicts.
+        document_pairs = build_document_pairs(documents)  # type: ignore[arg-type]
+        print(f"Built {len(document_pairs)} pair(s) from {len(documents)} document(s).")
+
+        with langfuse_client.start_as_current_observation(
+            name="Conflicts - Pairwise", as_type="chain"
+        ) as obs:
+            flagged_pairs = asyncio.get_event_loop().run_until_complete(
+                process_fan_out(document_pairs)
+            )
+            obs.update(
+                input=args.source_dataset,
+                output=f"{len(flagged_pairs)} pairs identified.",
+            )
 
-    # Review these O(N) per-document conflicts.
-    with langfuse_client.start_as_current_span(name="Conflicts- Review") as span:
-        conflict_reviews: list[ReviewedDocument] = (
-            asyncio.get_event_loop().run_until_complete(
-                process_conflict_reviews(conflicted_documents)
+        # Collect conflicts related to each document.
+        # from O(N^2) pairs to O(N) summarized per-document conflicts.
+        conflicted_documents = group_conflicts(flagged_pairs)
+
+        # Review these O(N) per-document conflicts.
+        with langfuse_client.start_as_current_observation(
+            name="Conflicts - Review", as_type="chain"
+        ) as obs:
+            conflict_reviews: list[ReviewedDocument] = (
+                asyncio.get_event_loop().run_until_complete(
+                    process_conflict_reviews(conflicted_documents)
+                )
             )
-        )
-        span.update(input=conflicted_documents, output=conflict_reviews)
+            obs.update(input=conflicted_documents, output=conflict_reviews)
+
+        # Generate markdown output
+        with open(args.output_report, "w") as output_file:
+            reports = [_review.get_report() for _review in conflict_reviews]
+            output_file.write("\n".join(reports))
+            print(f"Writing report to {args.output_report}.")
 
-    # Generate markdown output
-    with open(args.output_report, "w") as output_file:
-        reports = [_review.get_report() for _review in conflict_reviews]
-        output_file.write("\n".join(reports))
-        print(f"Writing report to {args.output_report}.")
+        span.update(output="Wrote report to " + args.output_report)