Skip to content

Commit b11448a

Browse files
authored
fix: escape apostrophes for parsing and rendering [JAR-9386] (#704)
1 parent f8b1b58 commit b11448a

File tree

4 files changed

+64
-5
lines changed

4 files changed

+64
-5
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "uipath-langchain"
3-
version = "0.8.25"
3+
version = "0.8.26"
44
description = "Python SDK that enables developers to build and deploy LangGraph agents to the UiPath Cloud Platform"
55
readme = { file = "README.md", content-type = "text/markdown" }
66
requires-python = ">=3.11"

src/uipath_langchain/runtime/_citations.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222

2323
logger = logging.getLogger(__name__)
2424

25-
_TAG_RE = re.compile(r'<uip:cite\s+((?:[a-z_]+="[^"]*"\s*)+)/\s*>')
26-
_ATTR_RE = re.compile(r'([a-z_]+)="([^"]*)"')
25+
_TAG_RE = re.compile(r'<uip:cite\s+((?:[a-z_]+="(?:[^"\\]|\\.)*"\s*)+)/\s*>')
26+
_ATTR_RE = re.compile(r'([a-z_]+)="((?:[^"\\]|\\.)*)"')
2727

2828

2929
@dataclass(frozen=True) # frozen to make hashable / de-dupe sources
@@ -45,7 +45,9 @@ def _parse_citations(text: str) -> list[tuple[str, _ParsedCitation | None]]:
4545
raw_attributes = match.group(1)
4646

4747
# title="foo" url="https://..." -> [("title","foo"), ("url","https://...")]
48-
attributes = dict(_ATTR_RE.findall(raw_attributes))
48+
attributes = {
49+
k: v.replace('\\"', '"') for k, v in _ATTR_RE.findall(raw_attributes)
50+
}
4951

5052
title = attributes.get("title", "")
5153
url = attributes.get("url")

tests/runtime/test_citations.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,34 @@ def test_uip_prefix_followed_by_citation_single_chunk(self):
556556
assert cited[0].data == "<uip "
557557
assert cited[0].citation.end.sources[0].url == "https://example.com"
558558

559+
def test_escaped_quotes_in_title(self):
560+
"""Citation with backslash-escaped quotes in title is parsed correctly."""
561+
proc = CitationStreamProcessor()
562+
text = r'Some text.<uip:cite title="The Peculiar Journey of \"Orange\"" url="https://example.com" />'
563+
events = proc.add_chunk(text)
564+
events.extend(proc.finalize())
565+
cited = [e for e in events if e.citation is not None]
566+
assert len(cited) == 1
567+
assert cited[0].data == "Some text."
568+
source = cited[0].citation.end.sources[0]
569+
assert isinstance(source, UiPathConversationCitationSourceUrl)
570+
assert source.url == "https://example.com"
571+
assert source.title == 'The Peculiar Journey of "Orange"'
572+
573+
def test_escaped_quotes_in_title_streamed(self):
574+
"""Escaped quotes in title still work when streamed across chunks."""
575+
proc = CitationStreamProcessor()
576+
events = proc.add_chunk(r'Text.<uip:cite title="Say \"hi')
577+
# "Text." is emitted immediately; the partial tag is buffered
578+
assert len(events) == 1
579+
assert events[0].data == "Text."
580+
assert events[0].citation is None
581+
events = proc.add_chunk(r' there\"" url="https://x.com" />')
582+
events.extend(proc.finalize())
583+
cited = [e for e in events if e.citation is not None]
584+
assert len(cited) == 1
585+
assert cited[0].citation.end.sources[0].title == 'Say "hi there"'
586+
559587

560588
class TestExtractCitationsFromText:
561589
"""Test cases for extract_citations_from_text function."""
@@ -689,6 +717,35 @@ def test_reference_without_page_number_skipped(self):
689717
assert cleaned == "UiPath reported earnings"
690718
assert citations == []
691719

720+
def test_escaped_quotes_in_title(self):
721+
"""Citation with escaped quotes in title is parsed and unescaped."""
722+
text = (
723+
r'A fact<uip:cite title="The \"Real\" Story" url="https://example.com" />'
724+
)
725+
cleaned, citations = extract_citations_from_text(text)
726+
assert cleaned == "A fact"
727+
assert len(citations) == 1
728+
source = citations[0].sources[0]
729+
assert isinstance(source, UiPathConversationCitationSourceUrl)
730+
assert source.title == 'The "Real" Story'
731+
assert source.url == "https://example.com"
732+
733+
def test_escaped_quotes_in_title_debug_dump_repro(self):
734+
"""Reproduce the exact tag from the debug dump that was failing."""
735+
text = (
736+
r'some text.<uip:cite title="The Peculiar Journey of \"Orange\"" '
737+
r'url="https://www.vocabulary.com/articles/wordroutes/the-peculiar-journey-of-orange/" />'
738+
)
739+
cleaned, citations = extract_citations_from_text(text)
740+
assert cleaned == "some text."
741+
assert len(citations) == 1
742+
source = citations[0].sources[0]
743+
assert source.title == 'The Peculiar Journey of "Orange"'
744+
assert (
745+
source.url
746+
== "https://www.vocabulary.com/articles/wordroutes/the-peculiar-journey-of-orange/"
747+
)
748+
692749
def test_different_sources_get_different_numbers(self):
693750
"""Different sources get incrementing numbers."""
694751
text = (

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)