Skip to content

Commit 6c30f6a

Browse files
Copilotowndev
andcommitted
Fix citation structure: emit separate events per source with unique names
Co-authored-by: owndev <69784886+owndev@users.noreply.github.com>
1 parent c4d7d5c commit 6c30f6a

2 files changed

Lines changed: 43 additions & 56 deletions

File tree

docs/azure-ai-citations.md

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -44,28 +44,34 @@ When Azure AI Search returns citations in a streaming response:
4444
When Azure AI Search returns citations in a non-streaming response:
4545

4646
1. The pipeline extracts citations from the response
47-
2. **If `AZURE_AI_OPENWEBUI_CITATIONS` is enabled**: Citation events are emitted via `__event_emitter__`
47+
2. **If `AZURE_AI_OPENWEBUI_CITATIONS` is enabled**: Individual citation events are emitted via `__event_emitter__` for each source
4848
3. **If `AZURE_AI_ENHANCE_CITATIONS` is enabled**: The response content is enhanced with a formatted citation section
4949

5050
## Citation Format
5151

5252
### OpenWebUI Citation Event Structure
5353

54-
Citation events follow the official OpenWebUI specification (see [OpenWebUI Events Documentation](https://docs.openwebui.com/features/plugin/development/events#source-or-citation-and-code-execution)):
54+
Each citation is emitted as a separate event to ensure all sources appear in the UI. Citation events follow the official OpenWebUI specification (see [OpenWebUI Events Documentation](https://docs.openwebui.com/features/plugin/development/events#source-or-citation-and-code-execution)):
5555

5656
```python
5757
{
5858
"type": "citation",
5959
"data": {
60-
"document": ["Document content 1", "Document content 2", ...], # Content from each citation
61-
"metadata": [{"source": "https://..."}, ...], # Metadata with source URLs
62-
"source": {"name": "Source Name"}, # Display name for the source
63-
"distances": [0.95, 0.87, ...] # Relevance scores (displayed as percentage)
60+
"document": ["Document content..."], # Content from this citation
61+
"metadata": [{"source": "https://..."}], # Metadata with source URL
62+
"source": {
63+
"name": "[doc1] Document Title", # Unique name with index
64+
"url": "https://..." # Source URL if available
65+
},
66+
"distances": [0.95] # Relevance score (displayed as percentage)
6467
}
6568
}
6669
```
6770

68-
The `distances` array contains relevance scores from Azure AI Search, which OpenWebUI displays as a percentage on the citation cards.
71+
Key points:
72+
- Each source document gets its own citation event
73+
- The `source.name` includes the doc index (`[doc1]`, `[doc2]`, etc.) to prevent grouping
74+
- The `distances` array contains relevance scores from Azure AI Search, which OpenWebUI displays as a percentage on the citation cards
6975

7076
### Azure Citation Format (Input)
7177

pipelines/azure/azure_ai_foundry.py

Lines changed: 30 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ def _normalize_citation_for_openwebui(
401401
self, citation: Dict[str, Any], index: int
402402
) -> Dict[str, Any]:
403403
"""
404-
Normalize an Azure citation object to OpenWebUI citation format.
404+
Normalize an Azure citation object to OpenWebUI citation event format.
405405
406406
The format follows OpenWebUI's official citation event structure:
407407
https://docs.openwebui.com/features/plugin/development/events#source-or-citation-and-code-execution
@@ -411,15 +411,18 @@ def _normalize_citation_for_openwebui(
411411
index: Citation index (1-based)
412412
413413
Returns:
414-
Normalized citation in OpenWebUI format
414+
Complete citation event object with type and data fields
415415
"""
416416
# Get title with fallback chain: title → filepath → url → "Unknown Document"
417-
title = (
417+
# Add index to make each source unique and prevent grouping
418+
base_title = (
418419
citation.get("title", "").strip()
419420
or citation.get("filepath", "").strip()
420421
or citation.get("url", "").strip()
421422
or "Unknown Document"
422423
)
424+
# Make title unique by appending doc index if there could be duplicates
425+
title = f"[doc{index}] {base_title}"
423426

424427
# Build source URL for metadata
425428
source_url = citation.get("url") or citation.get("filepath") or ""
@@ -429,19 +432,27 @@ def _normalize_citation_for_openwebui(
429432
if citation.get("metadata"):
430433
metadata_entry.update(citation.get("metadata", {}))
431434

432-
# Build normalized citation structure matching OpenWebUI format exactly
433-
normalized = {
435+
# Build normalized citation data structure matching OpenWebUI format exactly
436+
citation_data = {
434437
"document": [citation.get("content", "")],
435438
"metadata": [metadata_entry],
436439
"source": {"name": title},
437440
}
438441

442+
# Add URL to source if available
443+
if source_url:
444+
citation_data["source"]["url"] = source_url
445+
439446
# Add distances array for relevance score (OpenWebUI uses this for percentage display)
440447
if citation.get("score") is not None:
441448
# Wrap score in distances array as required by OpenWebUI format
442-
normalized["distances"] = [citation["score"]]
449+
citation_data["distances"] = [citation["score"]]
443450

444-
return normalized
451+
# Return complete citation event structure
452+
return {
453+
"type": "citation",
454+
"data": citation_data,
455+
}
445456

446457
async def _emit_openwebui_citation_events(
447458
self,
@@ -451,8 +462,9 @@ async def _emit_openwebui_citation_events(
451462
"""
452463
Emit OpenWebUI citation events for citations.
453464
454-
Emits a single citation event with all citations as arrays in the data fields,
455-
following the OpenWebUI citation event format.
465+
Emits one citation event per source document, following the OpenWebUI
466+
citation event format. Each citation is emitted separately to ensure
467+
all sources appear in the UI.
456468
457469
Args:
458470
citations: List of Azure citation objects
@@ -463,51 +475,20 @@ async def _emit_openwebui_citation_events(
463475

464476
log = logging.getLogger("azure_ai._emit_openwebui_citation_events")
465477

466-
try:
467-
# Build combined citation data with all citations
468-
all_documents = []
469-
all_metadata = []
470-
all_distances = []
471-
source_name = None
472-
473-
for i, citation in enumerate(citations, 1):
474-
if not isinstance(citation, dict):
475-
continue
478+
for i, citation in enumerate(citations, 1):
479+
if not isinstance(citation, dict):
480+
continue
476481

482+
try:
477483
normalized = self._normalize_citation_for_openwebui(citation, i)
478484

479-
# Collect documents, metadata, and distances from each citation
480-
all_documents.extend(normalized.get("document", []))
481-
all_metadata.extend(normalized.get("metadata", []))
482-
483-
if "distances" in normalized:
484-
all_distances.extend(normalized.get("distances", []))
485-
486-
# Use the first citation's source name for the combined event
487-
if source_name is None and "source" in normalized:
488-
source_name = normalized["source"].get("name", "Source")
489-
490-
# Build the combined citation event
491-
citation_event = {
492-
"type": "citation",
493-
"data": {
494-
"document": all_documents,
495-
"metadata": all_metadata,
496-
"source": {"name": source_name or "Azure AI Search"},
497-
},
498-
}
485+
# Emit citation event for this individual source
486+
await __event_emitter__(normalized)
499487

500-
# Add distances if we have any scores
501-
if all_distances:
502-
citation_event["data"]["distances"] = all_distances
488+
log.debug(f"Emitted citation event for doc{i}")
503489

504-
# Emit the citation event
505-
await __event_emitter__(citation_event)
506-
507-
log.debug(f"Emitted citation event with {len(all_documents)} documents")
508-
509-
except Exception as e:
510-
log.warning(f"Failed to emit citation events: {e}")
490+
except Exception as e:
491+
log.warning(f"Failed to emit citation event for citation {i}: {e}")
511492

512493
def enhance_azure_search_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
513494
"""

0 commit comments

Comments
 (0)