Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion src/praisonai-agents/praisonaiagents/agent/chat_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,43 @@ def _get_display_functions():
class ChatMixin:
"""Mixin providing chat methods for the Agent class."""

def _extract_llm_response_content(self, response) -> Optional[str]:
"""Extract actual message content from LLM response for better observability.

Instead of str(response) which shows the entire ChatCompletion object,
this extracts the actual message text that agents produce.

Args:
response: OpenAI ChatCompletion response object

Returns:
str: The actual message content, or fallback representation
"""
if not response:
return None

try:
# Try to extract the actual message content first
if hasattr(response, 'choices') and response.choices:
choice = response.choices[0]
if hasattr(choice, 'message') and hasattr(choice.message, 'content'):
content = choice.message.content
if content:
return content
# Tool-call turn: surface tool_calls summary instead of None
tool_calls = getattr(choice.message, 'tool_calls', None)
if tool_calls:
try:
names = [getattr(tc.function, 'name', '?') for tc in tool_calls]
return f"[tool_calls: {', '.join(names)}]"
except Exception:
pass
except (AttributeError, IndexError, TypeError):
pass

# Fallback to string representation if extraction fails
return str(response)
Comment on lines +59 to +94
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Tool-call responses will emit None content instead of a useful fallback.

When the LLM returns a tool call (no assistant text), response.choices[0].message.content is None. This method then returns None on line 79 (since the try block doesn't raise), and the str(response) fallback on line 84 is only reached when an exception is thrown. That is inconsistent with the docstring ("or fallback representation") and means tool-call turns will show up in the trace with empty response_content rather than e.g. a summary of the tool_calls — losing the very observability benefit this follow-up is trying to add.

Consider falling through to the fallback (or emitting a tool-calls summary) when content is falsy:

Proposed fix
         try:
             # Try to extract the actual message content first
             if hasattr(response, 'choices') and response.choices:
                 choice = response.choices[0]
                 if hasattr(choice, 'message') and hasattr(choice.message, 'content'):
-                    return choice.message.content
+                    content = choice.message.content
+                    if content:
+                        return content
+                    # Tool-call turn: surface tool_calls summary instead of None
+                    tool_calls = getattr(choice.message, 'tool_calls', None)
+                    if tool_calls:
+                        try:
+                            names = [getattr(tc.function, 'name', '?') for tc in tool_calls]
+                            return f"[tool_calls: {', '.join(names)}]"
+                        except Exception:
+                            pass
         except (AttributeError, IndexError, TypeError):
             pass
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/praisonai-agents/praisonaiagents/agent/chat_mixin.py` around lines 59 -
84, The _extract_llm_response_content function currently returns None when
response.choices[0].message.content is falsy (e.g., for tool calls) instead of
using the fallback; update _extract_llm_response_content to treat empty or None
content as a failure to extract and fall through to the fallback (str(response))
or generate a small summary of tool_calls from
response.choices[0].message.tool_calls if present; specifically, in the block
that inspects response.choices and choice.message.content, check for a truthy
content value before returning it, and if falsy, continue to the fallback branch
(or build a concise tool_call summary) so tool-call turns produce an informative
representation rather than None.


def _build_system_prompt(self, tools=None):
"""Build the system prompt with tool information.

Expand Down Expand Up @@ -572,7 +609,7 @@ def _chat_completion(self, messages, temperature=1.0, tools=None, stream=True, r
_trace_emitter.llm_response(
self.name,
duration_ms=_duration_ms,
response_content=str(final_response) if final_response else None,
response_content=self._extract_llm_response_content(final_response),
prompt_tokens=_prompt_tokens,
completion_tokens=_completion_tokens,
cost_usd=_cost_usd,
Expand Down
4 changes: 4 additions & 0 deletions src/praisonai-agents/praisonaiagents/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,10 @@
'Crawl4AITools': ('.crawl4ai_tools', 'Crawl4AITools'),
'crawl4ai_tools': ('.crawl4ai_tools', None),

# Langextract Tools (interactive text analysis)
'langextract_extract': ('.langextract_tools', None),
'langextract_render_file': ('.langextract_tools', None),

# Unified Web Search (auto-fallback across providers)
'search_web': ('.web_search', None),
'web_search': ('.web_search', None), # Alias
Expand Down
251 changes: 251 additions & 0 deletions src/praisonai-agents/praisonaiagents/tools/langextract_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
"""Langextract tools for interactive text analysis and extraction.

Provides first-class tool integration for langextract functionality,
allowing agents to create interactive HTML visualizations from text.

Usage:
from praisonaiagents.tools import langextract_extract

# Agent can call this tool directly
result = langextract_extract(
text="The quick brown fox jumps over the lazy dog.",
extractions=["fox", "dog"]
)

Architecture:
- Follows AGENTS.md tool patterns (decorator-based, lazy imports)
- Protocol-driven design with optional dependencies
- Zero overhead when langextract is not installed
"""

from typing import List, Optional, Dict, Any
from ..approval import require_approval
from .decorator import tool


@tool
def langextract_extract(
text: str,
extractions: Optional[List[str]] = None,
document_id: str = "agent-analysis",
output_path: Optional[str] = None,
auto_open: bool = False
) -> Dict[str, Any]:
"""Extract and annotate text using langextract for interactive visualization.

Creates an interactive HTML document with highlighted extractions that can be
viewed in a browser. Useful for text analysis, entity extraction, and
document annotation workflows.

Args:
text: The source text to analyze and extract from
extractions: List of text snippets to highlight in the document
document_id: Identifier for the document (used in HTML output)
output_path: Path to save HTML file (defaults to temp file)
auto_open: Whether to automatically open the HTML file in browser

Returns:
Dict containing:
- html_path: Path to the generated HTML file
- extractions_count: Number of extractions processed
- document_id: The document identifier used
- success: True if successful, False otherwise
- error: Error message if success is False

Raises:
ImportError: If langextract is not installed
ValueError: If text is empty or invalid
"""
if not text or not text.strip():
return {
"success": False,
"error": "Text cannot be empty",
"html_path": None,
"extractions_count": 0,
"document_id": document_id
}

try:
# Lazy import langextract (optional dependency)
try:
import langextract as lx # type: ignore
except ImportError:
return {
"success": False,
"error": "langextract is not installed. Install with: pip install langextract",
"html_path": None,
"extractions_count": 0,
"document_id": document_id
}

# Process extractions if provided
extractions_list = extractions or []
extraction_objects = []
added_count = 0

for i, extraction_text in enumerate(extractions_list):
if not extraction_text.strip():
continue

# Find all occurrences of the extraction in the text
start_pos = 0
while True:
pos = text.lower().find(extraction_text.lower(), start_pos)
if pos == -1:
break

# Create extraction with proper CharInterval
extraction = lx.data.Extraction(
extraction_class=f"extraction_{i}",
extraction_text=extraction_text,
char_interval=lx.data.CharInterval(
start_pos=pos,
end_pos=pos + len(extraction_text)
),
attributes={
"index": i,
"original_text": extraction_text,
"tool": "langextract_extract"
}
)
extraction_objects.append(extraction)
added_count += 1
start_pos = pos + 1

# Create AnnotatedDocument with extractions
document = lx.data.AnnotatedDocument(
document_id=document_id,
text=text,
extractions=extraction_objects
)

# Determine output path
if not output_path:
import tempfile
import os
output_path = os.path.join(
tempfile.gettempdir(),
f"langextract_{document_id}.html"
)

# Save as JSONL first, then render HTML
import tempfile
import os

# Create temporary JSONL file
jsonl_dir = tempfile.gettempdir()
jsonl_path = os.path.join(jsonl_dir, f"langextract_{document_id}.jsonl")

lx.io.save_annotated_documents(
[document],
output_name=os.path.basename(jsonl_path),
output_dir=jsonl_dir
)

# Generate HTML using visualize
html = lx.visualize(jsonl_path)
html_content = html.data if hasattr(html, 'data') else html

# Write HTML file
with open(output_path, 'w', encoding='utf-8') as f:
f.write(html_content)

# Clean up temporary JSONL
try:
os.remove(jsonl_path)
except OSError:
pass

# Auto-open if requested
if auto_open:
import webbrowser
from pathlib import Path
webbrowser.open(Path(output_path).resolve().as_uri())

return {
"success": True,
"html_path": output_path,
"extractions_count": added_count,
"document_id": document_id,
"error": None
}
Comment on lines +165 to +171
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 extractions_count counts search terms, not matched spans

len(extractions) counts how many strings were in the input list, not how many highlight spans were actually created. If the same term appears 5 times in the document, or a term never matches, the count is unchanged. Empty strings that get skipped via if not extraction_text.strip(): continue are also included in the count.

Consider tracking the actual number of Extraction objects added and returning that instead of len(extractions).


except Exception as e:
return {
"success": False,
"error": str(e),
"html_path": None,
"extractions_count": 0,
"document_id": document_id
}


@tool
@require_approval(risk_level="high")
def langextract_render_file(
Comment on lines +183 to +185
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Invalid risk_level passed to @require_approval

require_approval expects a RiskLevel value ("critical", "high", "medium", or "low"), but is being called with "File operations require approval for security" — a plain English description, not a valid risk level. This string is stored verbatim in the approval registry under _risk_levels["langextract_render_file"], so any downstream code checking get_risk_level(...) will receive this unexpected string, and the console approval prompt will display it as the risk level.

Suggested change
@tool
@require_approval("File operations require approval for security")
def langextract_render_file(
@tool
@require_approval("high")
def langextract_render_file(

file_path: str,
extractions: Optional[List[str]] = None,
output_path: Optional[str] = None,
auto_open: bool = False
) -> Dict[str, Any]:
"""Read a text file and create langextract visualization.

Reads a text file from disk and creates an interactive HTML visualization
with optional extractions highlighted.

Args:
file_path: Path to the text file to read
extractions: List of text snippets to highlight
output_path: Path to save HTML file (defaults to same dir as input)
auto_open: Whether to automatically open the HTML file in browser

Returns:
Dict with same structure as langextract_extract

Raises:
FileNotFoundError: If file_path does not exist
ImportError: If langextract is not installed
"""
import os

if not os.path.exists(file_path):
return {
"success": False,
"error": f"File not found: {file_path}",
"html_path": None,
"extractions_count": 0,
"document_id": os.path.basename(file_path)
}

try:
# Read file content
with open(file_path, 'r', encoding='utf-8') as f:
text = f.read()

# Default output path to same directory as input
if not output_path:
base_name = os.path.splitext(os.path.basename(file_path))[0]
output_dir = os.path.dirname(file_path)
output_path = os.path.join(output_dir, f"{base_name}_annotated.html")

# Use the main extract function
return langextract_extract(
text=text,
extractions=extractions,
document_id=os.path.basename(file_path),
output_path=output_path,
auto_open=auto_open
)

except Exception as e:
return {
"success": False,
"error": str(e),
"html_path": None,
"extractions_count": 0,
"document_id": os.path.basename(file_path)
}


# Export for direct import
__all__ = ["langextract_extract", "langextract_render_file"]
Loading