Skip to content

Commit 7360c4c

Browse files
Copilotowndev
andcommitted
Add comprehensive debug logging and fix distances field for citation events
Co-authored-by: owndev <69784886+owndev@users.noreply.github.com>
1 parent 43a0e64 commit 7360c4c

1 file changed

Lines changed: 71 additions & 11 deletions

File tree

pipelines/azure/azure_ai_foundry.py

Lines changed: 71 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,10 @@ def _extract_citations_from_response(
369369
Returns:
370370
List of citation objects, or None if no citations found
371371
"""
372+
log = logging.getLogger("azure_ai._extract_citations_from_response")
373+
372374
if not isinstance(response_data, dict):
375+
log.debug(f"Response data is not a dict: {type(response_data)}")
373376
return None
374377

375378
# Try multiple possible locations for citations
@@ -385,6 +388,9 @@ def _extract_citations_from_response(
385388
and "citations" in choice["delta"]["context"]
386389
):
387390
citations = choice["delta"]["context"]["citations"]
391+
log.info(
392+
f"Found {len(citations) if citations else 0} citations in delta.context.citations"
393+
)
388394

389395
# Check in choices[0].message.context.citations (non-streaming)
390396
elif (
@@ -394,8 +400,27 @@ def _extract_citations_from_response(
394400
and "citations" in choice["message"]["context"]
395401
):
396402
citations = choice["message"]["context"]["citations"]
403+
log.info(
404+
f"Found {len(citations) if citations else 0} citations in message.context.citations"
405+
)
406+
else:
407+
log.debug(
408+
f"No citations found in response. Choice keys: {choice.keys() if isinstance(choice, dict) else 'not a dict'}"
409+
)
410+
else:
411+
log.debug(f"No choices in response. Response keys: {response_data.keys()}")
412+
413+
if citations and isinstance(citations, list):
414+
log.info(f"Extracted {len(citations)} citations from response")
415+
# Log first citation structure for debugging
416+
if citations:
417+
log.info(
418+
f"First citation structure: {json.dumps(citations[0], default=str)[:500]}"
419+
)
420+
return citations
397421

398-
return citations if citations and isinstance(citations, list) else None
422+
log.debug("No valid citations found in response")
423+
return None
399424

400425
def _normalize_citation_for_openwebui(
401426
self, citation: Dict[str, Any], index: int
@@ -413,6 +438,8 @@ def _normalize_citation_for_openwebui(
413438
Returns:
414439
Complete citation event object with type and data fields
415440
"""
441+
log = logging.getLogger("azure_ai._normalize_citation_for_openwebui")
442+
416443
# Get title with fallback chain: title → filepath → url → "Unknown Document"
417444
base_title = (
418445
citation.get("title", "").strip()
@@ -431,9 +458,12 @@ def _normalize_citation_for_openwebui(
431458
if citation.get("metadata"):
432459
metadata_entry.update(citation.get("metadata", {}))
433460

461+
# Get document content
462+
content = citation.get("content", "")
463+
434464
# Build normalized citation data structure matching OpenWebUI format exactly
435465
citation_data = {
436-
"document": [citation.get("content", "")],
466+
"document": [content],
437467
"metadata": [metadata_entry],
438468
"source": {"name": title},
439469
}
@@ -443,16 +473,31 @@ def _normalize_citation_for_openwebui(
443473
citation_data["source"]["url"] = source_url
444474

445475
# Add distances array for relevance score (OpenWebUI uses this for percentage display)
446-
if citation.get("score") is not None:
447-
# Wrap score in distances array as required by OpenWebUI format
448-
citation_data["distances"] = [citation["score"]]
476+
# Always include distances to ensure relevance is shown (use 0 if score not available)
477+
score = citation.get("score")
478+
if score is not None:
479+
citation_data["distances"] = [float(score)]
480+
else:
481+
# Default to 0 if no score to ensure the distances field is present
482+
citation_data["distances"] = [0.0]
449483

450-
# Return complete citation event structure
451-
return {
484+
# Build complete citation event structure
485+
citation_event = {
452486
"type": "citation",
453487
"data": citation_data,
454488
}
455489

490+
# Log the normalized citation for debugging
491+
log.info(
492+
f"Normalized citation {index}: title='{title}', "
493+
f"content_length={len(content)}, "
494+
f"url='{source_url}', "
495+
f"score={score}, "
496+
f"event={json.dumps(citation_event, default=str)[:500]}"
497+
)
498+
499+
return citation_event
500+
456501
async def _emit_openwebui_citation_events(
457502
self,
458503
citations: List[Dict[str, Any]],
@@ -469,25 +514,40 @@ async def _emit_openwebui_citation_events(
469514
citations: List of Azure citation objects
470515
__event_emitter__: Event emitter callable for sending citation events
471516
"""
472-
if not __event_emitter__ or not citations:
517+
log = logging.getLogger("azure_ai._emit_openwebui_citation_events")
518+
519+
if not __event_emitter__:
520+
log.warning("No __event_emitter__ provided, cannot emit citation events")
473521
return
474522

475-
log = logging.getLogger("azure_ai._emit_openwebui_citation_events")
523+
if not citations:
524+
log.info("No citations to emit")
525+
return
526+
527+
log.info(f"Emitting {len(citations)} citation events via __event_emitter__")
476528

529+
emitted_count = 0
477530
for i, citation in enumerate(citations, 1):
478531
if not isinstance(citation, dict):
532+
log.warning(f"Citation {i} is not a dict, skipping: {type(citation)}")
479533
continue
480534

481535
try:
482536
normalized = self._normalize_citation_for_openwebui(citation, i)
483537

484538
# Emit citation event for this individual source
539+
log.info(
540+
f"Emitting citation event {i}/{len(citations)}: {normalized.get('data', {}).get('source', {}).get('name', 'unknown')}"
541+
)
485542
await __event_emitter__(normalized)
543+
emitted_count += 1
486544

487-
log.debug(f"Emitted citation event for doc{i}")
545+
log.info(f"Successfully emitted citation event for doc{i}")
488546

489547
except Exception as e:
490-
log.warning(f"Failed to emit citation event for citation {i}: {e}")
548+
log.exception(f"Failed to emit citation event for citation {i}: {e}")
549+
550+
log.info(f"Finished emitting {emitted_count}/{len(citations)} citation events")
491551

492552
def enhance_azure_search_response(self, response: Dict[str, Any]) -> Dict[str, Any]:
493553
"""

0 commit comments

Comments
 (0)