@@ -114,6 +114,42 @@ def notify(self, modifier=None):
114114 observer .update (self )
115115
116116
117+ def _strip_wrapped_flow_yaml_notes (text ):
118+ """Strip wrapped flow-style YAML notes without a nested regex."""
119+ lines = text .splitlines (keepends = True )
120+ stripped_lines = []
121+ i = 0
122+ while i < len (lines ):
123+ line = lines [i ]
124+ if (
125+ stripped_lines
126+ and stripped_lines [- 1 ].rstrip ().endswith ("," )
127+ and line .lstrip (" \t " ).startswith ("note:" )
128+ ):
129+ end_index = i
130+ while end_index < len (lines ):
131+ if "}" in lines [end_index ]:
132+ comma_index = stripped_lines [- 1 ].rfind ("," )
133+ brace_index = lines [end_index ].find ("}" )
134+ stripped_lines [- 1 ] = stripped_lines [- 1 ][:comma_index ] + lines [end_index ][brace_index :]
135+ i = end_index + 1
136+ break
137+ if lines [end_index ].rstrip ().endswith ("," ):
138+ i = end_index + 1
139+ break
140+ end_index += 1
141+ else :
142+ stripped_lines .append (line )
143+ i += 1
144+ continue
145+ continue
146+
147+ stripped_lines .append (line )
148+ i += 1
149+
150+ return "" .join (stripped_lines )
151+
152+
117153def make_output_subdirectory (output_directory , folder ):
118154 """
119155 Create a subdirectory `folder` in the output directory. If the folder
@@ -132,36 +168,26 @@ def strip_yaml_notes(src, dst):
132168 ordering, etc.) - important when the source is the carefully
133169 crafted ck2yaml output.
134170
135- Three patterns are handled (notes are always the last key, by
136- how RMG / ck2yaml emit them):
171+ Three patterns are handled:
137172 1. Block-style: `` note: ...`` on its own line,
138173 possibly followed by deeper-indented continuation lines
139174 (multi-line literal/folded scalars).
140- 2. Single-line flow: ``{..., note: foo}`` -> ``{...}``
175+ 2. Single-line flow: ``{..., note: foo, ... }`` -> ``{..., ...}``
141176 3. Wrapped flow: a flow mapping that wraps with the
142177 trailing ``,`` at the end of one line and
143- `` note: foo}`` on the next -> drop the comma and
144- replace with ``}`` on the prior line.
178+ `` note: foo`` on the next -> drop the note field.
145179 """
146180 if not os .path .exists (src ):
147181 return
148182 with open (src ) as f :
149183 text = f .read ()
150- # Wrapped flow style: a flow mapping that wraps after a
151- # trailing ``,``, with ``note: value`` on the next line
152- # (value may itself wrap across several more-indented lines)
153- # ending in ``}``. Replace the whole tail with ``}``.
154- # CodeQL flags this as polynomial ReDoS (py/polynomial-redos);
155- # safe here because [^\n}]* and \n[ \t]+ consume disjoint
156- # characters (no alternative-path overlap) and the inner *
157- # consumes >=2 chars per iteration, so worst-case is O(N^2)
158- # rather than exponential. Inputs are RMG-generated YAML,
159- # not adversarial.
160- text = re .sub (
161- r',[ \t]*\n[ \t]+note:[^\n}]*(?:\n[ \t]+[^\n}]*)*\}' ,
162- '}' , text ) # lgtm[py/polynomial-redos]
163- # Single-line flow style: ``, note: value}`` -> ``}``.
164- text = re .sub (r',[ \t]*note:[^,}]*\}' , '}' , text )
184+ # Wrapped flow style: a flow mapping that wraps after a trailing comma,
185+ # with ``note: value`` on the next line.
186+ text = _strip_wrapped_flow_yaml_notes (text )
187+ # Single-line flow style.
188+ text = re .sub (r',[ \t]*note:[^,}\n]*' , '' , text )
189+ text = re .sub (r'(\{)[ \t]*note:[^,}\n]*,[ \t]*' , r'\1' , text )
190+ text = re .sub (r'\{[ \t]*note:[^,}\n]*\}' , '{}' , text )
165191 # Block style: `` note: ...\n`` plus deeper-indented
166192 # continuation lines.
167193 text = re .sub (r'^( +)note:.*\n(?:\1 +[^\n]*\n)*' , '' , text , flags = re .MULTILINE )
0 commit comments