Skip to content

Commit 67f56c8

Browse files
committed
fix(cli): finalize JSONC parsing with safe trailing commas and secure fallback logic
1 parent 845b9f3 commit 67f56c8

2 files changed

Lines changed: 63 additions & 37 deletions

File tree

src/specify_cli/__init__.py

Lines changed: 45 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -685,9 +685,9 @@ def strip_json_comments(content: str) -> str:
685685
686686
This implementation handles VSCode settings.json files by:
687687
1. Skipping comments inside "string literals"
688-
2. Correctlly handling escaped quotes by counting backslashes
688+
2. Correctly handling escaped quotes by counting backslashes
689689
3. Removing single-line (//) and multi-line (/* */) comments
690-
4. Handling trailing commas safely (only outside strings)
690+
4. Handling trailing commas safely (only outside strings/comments)
691691
"""
692692
import re
693693

@@ -745,22 +745,28 @@ def strip_json_comments(content: str) -> str:
745745
i = length # End of content
746746
continue
747747

748+
# Handle trailing commas (safe because we're outside strings and comments)
749+
if char == ',':
750+
# Peek ahead to find the next non-whitespace character
751+
next_i = i + 1
752+
is_trailing = False
753+
while next_i < length:
754+
next_char = content[next_i]
755+
if next_char in (' ', '\t', '\n', '\r'):
756+
next_i += 1
757+
continue
758+
if next_char in (']', '}'):
759+
is_trailing = True
760+
break
761+
762+
if is_trailing:
763+
i += 1 # Skip the comma
764+
continue
765+
748766
result.append(char)
749767
i += 1
750768

751-
clean_content = "".join(result)
752-
753-
# Handle trailing commas: find a comma followed by closing brace/bracket
754-
# This is now safer as we've already stripped comments and identified strings
755-
# However, to be 100% safe against commas in strings, we should have handled this
756-
# during the character loop. Let's refine the regex to be very specific to
757-
# JSON structure (comma followed by whitespace and closing char).
758-
# Since we've already joined the result, we can apply a final cleanup.
759-
# To truly avoid commas in strings, we'd need to mark string ranges.
760-
# For now, this regex is standard for basic JSONC to JSON conversion.
761-
clean_content = re.sub(r',\s*([\]}])', r'\1', clean_content)
762-
763-
return clean_content
769+
return "".join(result)
764770

765771
def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = False) -> dict:
766772
"""Merge new JSON content into existing JSON file.
@@ -779,32 +785,37 @@ def merge_json_files(existing_path: Path, new_content: dict, verbose: bool = Fal
779785
Returns:
780786
Merged JSON content as dict
781787
"""
782-
if not isinstance(new_content, dict):
783-
if verbose:
784-
console.print(f"[yellow]Warning: Template content for {existing_path.name} is not a dictionary. Skipping merge.[/yellow]")
785-
return {}
786-
787-
try:
788-
if existing_path.exists():
788+
# Load existing content first to have a safe fallback
789+
existing_content = {}
790+
exists = existing_path.exists()
791+
792+
if exists:
793+
try:
789794
with open(existing_path, 'r', encoding='utf-8') as f:
790795
raw_content = f.read().lstrip('\ufeff')
791796
# Handle comments (JSONC)
792797
clean_content = strip_json_comments(raw_content)
793798
existing_content = json.loads(clean_content)
794-
795-
if not isinstance(existing_content, dict):
796-
if verbose:
797-
console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object, using template instead[/yellow]")
798-
return new_content
799-
else:
800-
return new_content
801-
802-
except (FileNotFoundError, json.JSONDecodeError) as e:
803-
# If file is invalid, just use new content
804-
if verbose and not isinstance(e, FileNotFoundError):
805-
console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}), using template instead[/yellow]")
799+
except (FileNotFoundError, json.JSONDecodeError) as e:
800+
if verbose and not isinstance(e, FileNotFoundError):
801+
console.print(f"[yellow]Warning: Could not parse existing JSON in {existing_path.name} ({e}).[/yellow]")
802+
# fallback to empty dict for merging
803+
804+
# Validate template content
805+
if not isinstance(new_content, dict):
806+
if verbose:
807+
console.print(f"[yellow]Warning: Template content for {existing_path.name} is not a dictionary. Preserving existing settings.[/yellow]")
808+
return existing_content if isinstance(existing_content, dict) else {}
809+
810+
if not exists:
806811
return new_content
807812

813+
# If existing content parsed but is not a dict, skip merge to avoid data loss
814+
if not isinstance(existing_content, dict):
815+
if verbose:
816+
console.print(f"[yellow]Warning: Existing JSON in {existing_path.name} is not an object. Skipping merge to avoid data loss.[/yellow]")
817+
return existing_content
818+
808819
def deep_merge_polite(base: dict, update: dict) -> dict:
809820
"""Recursively merge update dict into base dict, preserving base values."""
810821
result = base.copy()

tests/test_merge.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,21 @@ def test_strip_json_comments_trailing_commas_advanced():
4949
assert parsed["list"][0]["id"] == 1
5050
assert parsed["metadata"]["tags"] == ["a", "b"]
5151

52+
def test_strip_json_comments_safe_commas_in_strings():
53+
"""Verify that commas followed by closing characters INSIDE strings are not removed."""
54+
jsonc = """
55+
{
56+
"tricky_string": "value, ]",
57+
"another_one": "nested, }",
58+
"normal": [1, 2, ]
59+
}
60+
"""
61+
stripped = strip_json_comments(jsonc)
62+
parsed = json.loads(stripped)
63+
assert parsed["tricky_string"] == "value, ]"
64+
assert parsed["another_one"] == "nested, }"
65+
assert parsed["normal"] == [1, 2]
66+
5267
def test_strip_json_comments_whitespace():
5368
"""Ensure stripping works with various whitespace and empty values."""
5469
jsonc = "{ \n\n \"a\": 1\n, \n \"b\": 2 // comment \n }"
@@ -161,9 +176,9 @@ def test_merge_json_files_with_bom(tmp_path):
161176
assert merged == {"a": 1, "b": 2}
162177

163178
def test_merge_json_files_not_a_dictionary_template(tmp_path):
164-
"""If for some reason new_content is not a dict, return empty dict as safety."""
179+
"""If for some reason new_content is not a dict, PRESERVE existing settings instead of wiping."""
165180
existing_file = tmp_path / "ok.json"
166181
existing_file.write_text('{"a": 1}')
167182

168-
# In the updated implementation, if new_content is not a dict, we return {}
169-
assert merge_json_files(existing_file, ["not", "a", "dict"]) == {}
183+
# Secure fallback: return existing settings to avoid clobbering
184+
assert merge_json_files(existing_file, ["not", "a", "dict"]) == {"a": 1}

0 commit comments

Comments
 (0)