Skip to content

Commit fa54293

Browse files
authored
fix: ignore citation types not matching cas schema NO-JIRA (#643)
1 parent d85a752 commit fa54293

File tree

4 files changed

+55
-21
lines changed

4 files changed

+55
-21
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "uipath-langchain"
3-
version = "0.7.7"
3+
version = "0.7.9"
44
description = "Python SDK that enables developers to build and deploy LangGraph agents to the UiPath Cloud Platform"
55
readme = { file = "README.md", content-type = "text/markdown" }
66
requires-python = ">=3.11"

src/uipath_langchain/runtime/_citations.py

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,18 @@ def _parse_citations(text: str) -> list[tuple[str, _ParsedCitation | None]]:
5353

5454
has_url = url is not None
5555
has_reference = reference is not None
56+
has_page_number = page_number is not None
5657

5758
if has_url and not has_reference:
5859
# web citation
5960
citation = _ParsedCitation(title=title, url=url, page_number=page_number)
60-
elif has_reference and not has_url:
61+
elif has_reference and has_page_number and not has_url:
6162
# context grounding citation
6263
citation = _ParsedCitation(
6364
title=title, reference=reference, page_number=page_number
6465
)
6566
else:
66-
# skip; citation has no url= or reference=
67+
# skip; doesn't match a valid source type
6768
if preceding_text:
6869
segments.append((preceding_text, None))
6970
cursor = match.end()
@@ -90,30 +91,32 @@ def _make_source(
9091
source_numbers: dict[_ParsedCitation, int],
9192
next_number: int,
9293
) -> tuple[
93-
UiPathConversationCitationSourceUrl | UiPathConversationCitationSourceMedia, int
94+
UiPathConversationCitationSourceUrl | UiPathConversationCitationSourceMedia | None,
95+
int,
9496
]:
95-
"""Build a citation source, deduplicating by assigning numbers"""
96-
if citation not in source_numbers:
97-
source_numbers[citation] = next_number
98-
next_number += 1
99-
number = source_numbers[citation]
100-
101-
source: UiPathConversationCitationSourceUrl | UiPathConversationCitationSourceMedia
97+
"""Build a citation source, deduplicating by assigning numbers."""
10298
if citation.url is not None:
103-
source = UiPathConversationCitationSourceUrl(
99+
if citation not in source_numbers:
100+
source_numbers[citation] = next_number
101+
next_number += 1
102+
return UiPathConversationCitationSourceUrl(
104103
title=citation.title,
105-
number=number,
104+
number=source_numbers[citation],
106105
url=citation.url,
107-
)
108-
else:
109-
source = UiPathConversationCitationSourceMedia(
106+
), next_number
107+
elif citation.reference is not None and citation.page_number is not None:
108+
if citation not in source_numbers:
109+
source_numbers[citation] = next_number
110+
next_number += 1
111+
return UiPathConversationCitationSourceMedia(
110112
title=citation.title,
111-
number=number,
113+
number=source_numbers[citation],
112114
mime_type=None,
113115
download_url=citation.reference,
114116
page_number=citation.page_number,
115-
)
116-
return source, next_number
117+
), next_number
118+
else:
119+
return None, next_number
117120

118121

119122
def _find_partial_tag_start(text: str) -> int:
@@ -160,6 +163,9 @@ def _build_content_part_citation(
160163
citation, self._source_numbers, self._next_number
161164
)
162165

166+
if not source:
167+
return UiPathConversationContentPartChunkEvent(data=text)
168+
163169
return UiPathConversationContentPartChunkEvent(
164170
data=text,
165171
citation=UiPathConversationCitationEvent(
@@ -236,6 +242,9 @@ def extract_citations_from_text(
236242

237243
if citation is not None:
238244
source, next_number = _make_source(citation, source_numbers, next_number)
245+
if not source:
246+
offset += length
247+
continue
239248
if length > 0:
240249
citations.append(
241250
UiPathConversationCitationData(

tests/runtime/test_citations.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ def test_mixed_valid_and_invalid_citations(self):
287287
text = (
288288
'A<uip:cite title="Valid" url="https://v.com" />'
289289
'B<uip:cite title="Invalid" page_number="1" />'
290-
'C<uip:cite title="Also Valid" reference="https://r.com" />'
290+
'C<uip:cite title="Also Valid" reference="https://r.com" page_number="2" />'
291291
)
292292
events = proc.add_chunk(text)
293293
events.extend(proc.finalize())
@@ -440,6 +440,20 @@ def test_citation_with_both_url_and_reference_skipped(self):
440440
assert combined == "Text more"
441441
assert all(e.citation is None for e in events)
442442

443+
def test_reference_without_page_number_skipped(self):
444+
"""Web URL misclassified as reference (no page_number) must not emit a media source."""
445+
proc = CitationStreamProcessor()
446+
text = (
447+
"UiPath reported earnings"
448+
'<uip:cite title="UiPath Reports Third Quarter Fiscal 2026 Fin..." '
449+
'reference="https://ir.uipath.com/news/detail/420/uipath-reports-third-quarter-fiscal-2026-financial-results" />'
450+
)
451+
events = proc.add_chunk(text)
452+
events.extend(proc.finalize())
453+
combined = "".join(e.data for e in events if e.data)
454+
assert combined == "UiPath reported earnings"
455+
assert all(e.citation is None for e in events)
456+
443457
def test_only_whitespace_between_citations(self):
444458
"""Citations separated only by whitespace."""
445459
proc = CitationStreamProcessor()
@@ -664,6 +678,17 @@ def test_text_with_trailing_content(self):
664678
assert citations[0].offset == 0
665679
assert citations[0].length == 6 # len("A fact")
666680

681+
def test_reference_without_page_number_skipped(self):
682+
"""Web URL misclassified as reference (no page_number) must not emit a citation."""
683+
text = (
684+
"UiPath reported earnings"
685+
'<uip:cite title="UiPath Reports Third Quarter Fiscal 2026 Fin..." '
686+
'reference="https://ir.uipath.com/news/detail/420/uipath-reports-third-quarter-fiscal-2026-financial-results" />'
687+
)
688+
cleaned, citations = extract_citations_from_text(text)
689+
assert cleaned == "UiPath reported earnings"
690+
assert citations == []
691+
667692
def test_different_sources_get_different_numbers(self):
668693
"""Different sources get incrementing numbers."""
669694
text = (

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)