Skip to content

Commit f69175e

Browse files
committed
Another weird case in the yaml note-stripper.
This is such a pain, to think of all the weird edge cases. Is it worth it?
1 parent fb7c3e1 commit f69175e

2 files changed

Lines changed: 66 additions & 4 deletions

File tree

rmgpy/util.py

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,59 @@ def _strip_wrapped_flow_yaml_notes(text):
150150
return "".join(stripped_lines)
151151

152152

153+
def _find_flow_yaml_delimiter(text, start):
154+
"""Find the next comma or closing brace outside quoted text."""
155+
quote = None
156+
i = start
157+
while i < len(text):
158+
char = text[i]
159+
if quote:
160+
if char == quote:
161+
if quote == "'" and i + 1 < len(text) and text[i + 1] == "'":
162+
i += 2
163+
continue
164+
quote = None
165+
elif quote == '"' and char == "\\":
166+
i += 2
167+
continue
168+
elif char in ("'", '"'):
169+
quote = char
170+
elif char in ",}":
171+
return i
172+
i += 1
173+
return -1
174+
175+
176+
def _strip_single_line_flow_yaml_notes(text):
177+
"""Strip single-line flow-style YAML notes without splitting quoted commas."""
178+
lines = text.splitlines(keepends=True)
179+
stripped_lines = []
180+
note_pattern = re.compile(r'([,{])[ \t]*note:')
181+
for line in lines:
182+
search_start = 0
183+
while True:
184+
match = note_pattern.search(line, search_start)
185+
if not match:
186+
break
187+
value_start = match.end()
188+
value_end = _find_flow_yaml_delimiter(line, value_start)
189+
if value_end == -1:
190+
break
191+
delimiter = line[value_end]
192+
if delimiter == ",":
193+
line = line[:match.start()] + line[value_end:]
194+
search_start = match.start()
195+
elif match.group(1) == ",":
196+
line = line[:match.start()] + line[value_end:]
197+
search_start = match.start()
198+
else:
199+
line = line[:match.start() + 1] + line[value_end:]
200+
search_start = match.start() + 1
201+
stripped_lines.append(line)
202+
203+
return "".join(stripped_lines)
204+
205+
153206
def make_output_subdirectory(output_directory, folder):
154207
"""
155208
Create a subdirectory `folder` in the output directory. If the folder
@@ -184,10 +237,7 @@ def strip_yaml_notes(src, dst):
184237
# Wrapped flow style: a flow mapping that wraps after a trailing comma,
185238
# with ``note: value`` on the next line.
186239
text = _strip_wrapped_flow_yaml_notes(text)
187-
# Single-line flow style.
188-
text = re.sub(r',[ \t]*note:[^,}\n]*', '', text)
189-
text = re.sub(r'(\{)[ \t]*note:[^,}\n]*,[ \t]*', r'\1', text)
190-
text = re.sub(r'\{[ \t]*note:[^,}\n]*\}', '{}', text)
240+
text = _strip_single_line_flow_yaml_notes(text)
191241
# Block style: `` note: ...\n`` plus deeper-indented
192242
# continuation lines.
193243
text = re.sub(r'^( +)note:.*\n(?:\1 +[^\n]*\n)*', '', text, flags=re.MULTILINE)

test/rmgpy/rmgUtilTest.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,18 @@ def test_strip_yaml_notes_removes_single_line_flow_note(self, tmp_path):
8888

8989
assert self.strip_yaml_notes(tmp_path, source) == expected
9090

91+
def test_strip_yaml_notes_removes_single_line_flow_note_with_quoted_comma(self, tmp_path):
92+
source = """species:
93+
- name: Ar
94+
transport: {model: gas, note: 'comma here, danger!', geometry: atom, diameter: 3.33, well-depth: 136.5}
95+
"""
96+
expected = """species:
97+
- name: Ar
98+
transport: {model: gas, geometry: atom, diameter: 3.33, well-depth: 136.5}
99+
"""
100+
101+
assert self.strip_yaml_notes(tmp_path, source) == expected
102+
91103
def test_strip_yaml_notes_removes_wrapped_flow_note(self, tmp_path):
92104
source = """species:
93105
- name: CH4

0 commit comments

Comments
 (0)