Skip to content

Commit 00b4f18

Browse files
committed
#13515 Fix comma-aware parsing of gRPC string values in Python
Replace naive split(", ") in __makelist() with a quote-aware state machine that respects quoted strings and nested brackets. Also add __unescape_string() to reverse C++ escaping, and fix strip('"') to only remove the outer quote pair.
1 parent a85b49f commit 00b4f18

3 files changed

Lines changed: 217 additions & 19 deletions

File tree

GrpcInterface/Python/rips/pdmobject.py

Lines changed: 65 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -212,13 +212,14 @@ def __convert_from_grpc_value(self, value: str) -> Value:
212212
float_val = float(value)
213213
return float_val
214214
except ValueError:
215+
# We may have a string. Remove the outer pair of quotes
216+
if len(value) >= 2 and value[0] == '"' and value[-1] == '"':
217+
value = value[1:-1]
215218
if self.__islist(value):
216219
return self.__makelist(value)
217220
if self.__istuple(value):
218221
return self.__maketuple(value)
219-
# We may have a string. Strip internal start and end quotes
220-
value = value.strip('"')
221-
return value
222+
return self.__unescape_string(value)
222223

223224
def __convert_to_grpc_value(self, value: Any) -> str:
224225
if isinstance(value, bool):
@@ -284,26 +285,73 @@ def __maketuple(self, tuple_string: str) -> Value:
284285

285286
return ()
286287

288+
def __unescape_string(self, value: str) -> str:
289+
result = []
290+
i = 0
291+
while i < len(value):
292+
if value[i] == "\\" and i + 1 < len(value):
293+
next_ch = value[i + 1]
294+
if next_ch == '"' or next_ch == "\\":
295+
result.append(next_ch)
296+
i += 2
297+
continue
298+
result.append(value[i])
299+
i += 1
300+
return "".join(result)
301+
287302
def __makelist(self, list_string: str) -> Value:
288303
list_string = list_string.removeprefix("[")
289304
list_string = list_string.removesuffix("]")
290305
if not list_string:
291-
# Return empty list if empty string. Otherwise, the split function will return ['']
292306
return []
293307

294-
# Check if it's a nested list or single list
295-
if "], [" in list_string:
296-
# Nested list
297-
# Split by ], [ to get each sublist
298-
sublists = re.split(r"\], \[", list_string)
299-
return [self.__makelist(sublist) for sublist in sublists]
300-
else:
301-
# Single list
302-
strings = list_string.split(", ")
303-
values = []
304-
for string in strings:
305-
values.append(self.__convert_from_grpc_value(string))
306-
return values
308+
# Quote-aware split: track quote state, escape state, and bracket depth
309+
# so commas inside "..." or [...] are not treated as separators.
310+
items = []
311+
current = []
312+
in_quotes = False
313+
escape_next = False
314+
bracket_depth = 0
315+
316+
for ch in list_string:
317+
if escape_next:
318+
current.append(ch)
319+
escape_next = False
320+
continue
321+
322+
if ch == "\\" and in_quotes:
323+
current.append(ch)
324+
escape_next = True
325+
continue
326+
327+
if ch == '"':
328+
in_quotes = not in_quotes
329+
current.append(ch)
330+
continue
331+
332+
if not in_quotes:
333+
if ch == "[":
334+
bracket_depth += 1
335+
elif ch == "]":
336+
bracket_depth -= 1
337+
elif ch == "," and bracket_depth == 0:
338+
# Separator: expect ", " so skip the following space
339+
items.append("".join(current))
340+
current = []
341+
continue
342+
elif ch == " " and not current:
343+
# Skip space after comma separator
344+
continue
345+
346+
current.append(ch)
347+
348+
if current:
349+
items.append("".join(current))
350+
351+
values = []
352+
for item in items:
353+
values.append(self.__convert_from_grpc_value(item))
354+
return values
307355

308356
def __from_pb2_to_resinsight_classes(
309357
self,
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
"""
2+
Tests for PdmObjectBase.__makelist() and __convert_from_grpc_value() parsing.
3+
4+
These tests verify quote-aware parsing of gRPC string values, including
5+
strings containing commas, escaped quotes, nested lists, and schedule text.
6+
Regression tests for #13515.
7+
"""
8+
9+
import sys
10+
import os
11+
import pytest
12+
13+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
14+
15+
from rips.pdmobject import PdmObjectBase
16+
17+
18+
@pytest.fixture
19+
def parser():
20+
"""Create a PdmObjectBase instance for testing parsing methods."""
21+
return PdmObjectBase(pb2_object=None, channel=None)
22+
23+
24+
class TestEmptyAndSimpleLists:
25+
"""Tests for basic list parsing."""
26+
27+
def test_empty_list(self, parser):
28+
result = parser._PdmObjectBase__convert_from_grpc_value("[]")
29+
assert result == []
30+
31+
def test_single_int(self, parser):
32+
result = parser._PdmObjectBase__convert_from_grpc_value("[42]")
33+
assert result == [42]
34+
35+
def test_int_list(self, parser):
36+
result = parser._PdmObjectBase__convert_from_grpc_value("[1, 2, 3]")
37+
assert result == [1, 2, 3]
38+
39+
def test_float_list(self, parser):
40+
result = parser._PdmObjectBase__convert_from_grpc_value("[1.5, 2.5, 3.5]")
41+
assert result == [1.5, 2.5, 3.5]
42+
43+
def test_bool_list(self, parser):
44+
result = parser._PdmObjectBase__convert_from_grpc_value("[true, false, true]")
45+
assert result == [True, False, True]
46+
47+
def test_simple_string_list(self, parser):
48+
result = parser._PdmObjectBase__convert_from_grpc_value('["hello", "world"]')
49+
assert result == ["hello", "world"]
50+
51+
52+
class TestStringsWithCommas:
53+
"""Core regression tests for #13515: strings containing commas."""
54+
55+
def test_single_string_with_comma(self, parser):
56+
# C++ sends: ["hello, world"]
57+
result = parser._PdmObjectBase__convert_from_grpc_value('["hello, world"]')
58+
assert result == ["hello, world"]
59+
60+
def test_multiple_strings_with_commas(self, parser):
61+
# C++ sends: ["a, b", "c, d"]
62+
result = parser._PdmObjectBase__convert_from_grpc_value('["a, b", "c, d"]')
63+
assert result == ["a, b", "c, d"]
64+
65+
def test_string_with_multiple_commas(self, parser):
66+
result = parser._PdmObjectBase__convert_from_grpc_value(
67+
'["one, two, three, four"]'
68+
)
69+
assert result == ["one, two, three, four"]
70+
71+
def test_mixed_strings_with_and_without_commas(self, parser):
72+
result = parser._PdmObjectBase__convert_from_grpc_value(
73+
'["no comma", "has, comma", "also none"]'
74+
)
75+
assert result == ["no comma", "has, comma", "also none"]
76+
77+
78+
class TestEscapedCharacters:
79+
"""Tests for escaped quotes and backslashes inside strings."""
80+
81+
def test_escaped_quote_in_string(self, parser):
82+
# C++ sends: ["say \"hello\""] -> Python should get: say "hello"
83+
result = parser._PdmObjectBase__convert_from_grpc_value('["say \\"hello\\""]')
84+
assert result == ['say "hello"']
85+
86+
def test_escaped_backslash_in_string(self, parser):
87+
# C++ sends: ["path\\to\\file"] -> Python should get: path\to\file
88+
result = parser._PdmObjectBase__convert_from_grpc_value(
89+
'["path\\\\to\\\\file"]'
90+
)
91+
assert result == ["path\\to\\file"]
92+
93+
def test_escaped_quote_and_comma(self, parser):
94+
# C++ sends: ["say \"hi\", bye"] -> Python should get: say "hi", bye
95+
result = parser._PdmObjectBase__convert_from_grpc_value('["say \\"hi\\", bye"]')
96+
assert result == ['say "hi", bye']
97+
98+
def test_scalar_string_unescape(self, parser):
99+
# A scalar (non-list) string with escaped characters
100+
result = parser._PdmObjectBase__convert_from_grpc_value('"say \\"hello\\""')
101+
assert result == 'say "hello"'
102+
103+
def test_scalar_string_unescape_backslash(self, parser):
104+
result = parser._PdmObjectBase__convert_from_grpc_value(
105+
'"C:\\\\Users\\\\file.txt"'
106+
)
107+
assert result == "C:\\Users\\file.txt"
108+
109+
110+
class TestNestedLists:
111+
"""Tests for nested list parsing."""
112+
113+
def test_nested_int_lists(self, parser):
114+
result = parser._PdmObjectBase__convert_from_grpc_value("[[1, 2], [3, 4]]")
115+
assert result == [[1, 2], [3, 4]]
116+
117+
def test_nested_string_lists_with_commas(self, parser):
118+
result = parser._PdmObjectBase__convert_from_grpc_value(
119+
'[["a, b", "c"], ["d", "e, f"]]'
120+
)
121+
assert result == [["a, b", "c"], ["d", "e, f"]]
122+
123+
124+
class TestMixedTypes:
125+
"""Tests for lists with mixed types."""
126+
127+
def test_mixed_type_list(self, parser):
128+
result = parser._PdmObjectBase__convert_from_grpc_value(
129+
'[1, "text, with comma", 3.5, true]'
130+
)
131+
assert result == [1, "text, with comma", 3.5, True]
132+
133+
134+
class TestScheduleTextRegression:
135+
"""Regression test for schedule text containing COMPDAT with commas."""
136+
137+
def test_schedule_text_single_string_not_split(self, parser):
138+
# Schedule text is a single string containing many commas
139+
# C++ wraps it in quotes: ["COMPDAT\n 'Well' 1 2 3 /\n/"]
140+
schedule = "COMPDAT\n 'WellA' 10 20 30 40 OPEN 1* 0.1 4* 0.5 /\n/"
141+
grpc_value = '["' + schedule.replace("\\", "\\\\").replace('"', '\\"') + '"]'
142+
result = parser._PdmObjectBase__convert_from_grpc_value(grpc_value)
143+
assert len(result) == 1
144+
assert result[0] == schedule
145+
146+
def test_compdat_with_commas_in_comments(self, parser):
147+
# A COMPDAT string that contains commas in inline comments
148+
text = "COMPDAT -- well completions, zone A, zone B\n 'Well' 1 2 3 /\n/"
149+
grpc_value = '["' + text.replace("\\", "\\\\").replace('"', '\\"') + '"]'
150+
result = parser._PdmObjectBase__convert_from_grpc_value(grpc_value)
151+
assert len(result) == 1
152+
assert result[0] == text

GrpcInterface/Python/rips/well_events.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,5 @@ def generate_schedule_text(self: WellEventTimeline, eclipse_case: Case) -> str:
314314
"""
315315
container = self.generate_schedule(eclipse_case_id=eclipse_case.id)
316316
if container and container.values:
317-
# Workaround: Concatenate all values in case the schedule text
318-
# was split by comma parsing in the gRPC layer
319317
return "".join(container.values)
320318
return ""

0 commit comments

Comments
 (0)