Skip to content

Commit 52e90a6

Browse files
committed
fix: support ignore for diffjson
1 parent 466822b commit 52e90a6

2 files changed

Lines changed: 98 additions & 20 deletions

File tree

.github/workflows/regression.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ jobs:
1414
#if: "!contains(github.event.pull_request.title, '[NO-REGRESSION-TEST]')"
1515
env:
1616
LANGS: "go rust python java typescript"
17+
DIFFJSON_IGNORE: " ['id'] ['Path'] "
1718
steps:
1819
- name: Checkout pull request code
1920
uses: actions/checkout@v4
@@ -101,4 +102,4 @@ jobs:
101102
retention-days: 3
102103

103104
- name: Compare outputs and check for regression
104-
run: ./pr_repo/script/diffjson.py out_old out_new
105+
run: ./pr_repo/script/diffjson.py out_old out_new $COMPARE_IGNORE

script/diffjson.py

Lines changed: 96 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#!/usr/bin/env python3
22
import argparse
33
import json
4+
import os
5+
import re
46
import sys
57
from pathlib import Path
68
from typing import Literal
@@ -11,6 +13,56 @@
1113
Status = Literal["OK", "BAD", "FILE_ERROR"]
1214

1315

16+
def parse_accessor(accessor_string: str) -> list[str | int]:
17+
"""
18+
Parses a field accessor string like "['key'][0]" into a list ['key', 0].
19+
This allows for programmatic access to nested JSON elements.
20+
"""
21+
# Regex to find content within brackets, e.g., ['key'] or [0]
22+
parts = re.findall(r"\[([^\]]+)\]", accessor_string)
23+
keys = []
24+
for part in parts:
25+
try:
26+
# Try to convert to an integer for list indices
27+
keys.append(int(part))
28+
except ValueError:
29+
# Otherwise, it's a string key; strip surrounding quotes
30+
keys.append(part.strip("'\""))
31+
return keys
32+
33+
34+
def delete_path(data: dict | list, path: list[str | int]):
35+
"""
36+
Deletes a value from a nested dictionary or list based on a path.
37+
This function modifies the data in place. If the path is invalid
38+
or doesn't exist, it does nothing.
39+
"""
40+
if not path:
41+
return
42+
43+
# Traverse to the parent of the target element to delete it
44+
parent = data
45+
key_to_delete = path[-1]
46+
path_to_parent = path[:-1]
47+
48+
try:
49+
for key in path_to_parent:
50+
parent = parent[key]
51+
52+
# Check if the final key/index exists in the parent before deleting
53+
if isinstance(parent, dict) and key_to_delete in parent:
54+
del parent[key_to_delete]
55+
elif (
56+
isinstance(parent, list)
57+
and isinstance(key_to_delete, int)
58+
and 0 <= key_to_delete < len(parent)
59+
):
60+
del parent[key_to_delete]
61+
except (KeyError, IndexError, TypeError):
62+
# Path is invalid (e.g., key missing, index out of bounds). Ignore and proceed.
63+
pass
64+
65+
1466
def format_diff_custom(diff: DeepDiff) -> str:
1567
"""
1668
Formats a DeepDiff object into a custom human-readable string.
@@ -65,27 +117,39 @@ def format_value(value):
65117
return "\n".join(output)
66118

67119

68-
def compare_json_files(file1_path: Path, file2_path: Path) -> Status:
120+
def compare_json_files(
121+
file1_path: Path, file2_path: Path, ignore_fields: list[str] | None = None
122+
) -> tuple[Status, DeepDiff | None]:
69123
"""
70-
Compares the content of two JSON files without printing output.
124+
Compares two JSON files, optionally ignoring specified fields.
71125
72126
Returns:
73-
"OK" if they match, "BAD" if they don't, "FILE_ERROR" on read/parse error.
127+
A tuple containing the status ("OK", "BAD", "FILE_ERROR")
128+
and the DeepDiff object if differences were found.
74129
"""
75130
try:
76131
with open(file1_path, "r", encoding="utf-8") as f1:
77132
json1 = json.load(f1)
78133
with open(file2_path, "r", encoding="utf-8") as f2:
79134
json2 = json.load(f2)
80135
except (FileNotFoundError, json.JSONDecodeError):
81-
return "FILE_ERROR"
136+
return "FILE_ERROR", None
137+
138+
# Delete ignored fields from both JSON objects before comparison
139+
if ignore_fields:
140+
for field_accessor in ignore_fields:
141+
path = parse_accessor(field_accessor)
142+
delete_path(json1, path)
143+
delete_path(json2, path)
82144

83145
diff = DeepDiff(json1, json2, ignore_order=True)
84146

85-
return "BAD" if diff else "OK"
147+
return ("BAD", diff) if diff else ("OK", None)
86148

87149

88-
def process_directory_comparison(old_dir: Path, new_dir: Path) -> bool:
150+
def process_directory_comparison(
151+
old_dir: Path, new_dir: Path, ignore_fields: list[str] | None = None
152+
) -> bool:
89153
"""
90154
Compares JSON files across two directories and prints results in a list format.
91155
"""
@@ -94,7 +158,9 @@ def process_directory_comparison(old_dir: Path, new_dir: Path) -> bool:
94158
new_files = {p.name for p in new_dir.glob("*.json")}
95159

96160
for filename in sorted(old_files.intersection(new_files)):
97-
status = compare_json_files(old_dir / filename, new_dir / filename)
161+
status, _ = compare_json_files(
162+
old_dir / filename, new_dir / filename, ignore_fields
163+
)
98164
results["BAD" if status != "OK" else "OK"].append(filename)
99165

100166
for filename in sorted(old_files - new_files):
@@ -125,8 +191,25 @@ def main():
125191
parser.add_argument(
126192
"path2", type=Path, help="Path to the second file or 'new' directory."
127193
)
194+
parser.add_argument(
195+
"-i",
196+
"--ignore",
197+
action="append",
198+
default=[],
199+
help="Field to ignore, as an accessor string. Can be used multiple times. "
200+
"Also reads whitespace-separated values from $DIFFJSON_IGNORE. "
201+
"Example: -i \"['metadata']['timestamp']\"",
202+
)
128203
args = parser.parse_args()
129204

205+
# --- Combine ignore fields from CLI and environment variable ---
206+
cli_ignore_fields = args.ignore
207+
env_ignore_str = os.environ.get("DIFFJSON_IGNORE", "")
208+
env_ignore_fields = env_ignore_str.split() if env_ignore_str else []
209+
210+
# Combine both sources and remove duplicates
211+
all_ignore_fields = list(set(cli_ignore_fields + env_ignore_fields))
212+
130213
path1, path2 = args.path1, args.path2
131214

132215
if not path1.exists() or not path2.exists():
@@ -139,7 +222,7 @@ def main():
139222
# --- Handle Directory Comparison ---
140223
if path1.is_dir() and path2.is_dir():
141224
print(f"Comparing directories:\n- Old: {path1}\n- New: {path2}\n")
142-
if process_directory_comparison(path1, path2):
225+
if process_directory_comparison(path1, path2, all_ignore_fields):
143226
print("\nComparison finished with errors.", file=sys.stderr)
144227
return 1
145228
else:
@@ -148,23 +231,17 @@ def main():
148231

149232
# --- Handle Single File Comparison ---
150233
elif path1.is_file() and path2.is_file():
151-
try:
152-
with open(path1, "r", encoding="utf-8") as f1:
153-
json1 = json.load(f1)
154-
with open(path2, "r", encoding="utf-8") as f2:
155-
json2 = json.load(f2)
156-
except (FileNotFoundError, json.JSONDecodeError) as e:
157-
print(f"Error reading or parsing file: {e}", file=sys.stderr)
158-
return 1
234+
status, diff = compare_json_files(path1, path2, all_ignore_fields)
159235

160-
diff = DeepDiff(json1, json2, ignore_order=True)
236+
if status == "FILE_ERROR":
237+
print("Error reading or parsing a file.", file=sys.stderr)
238+
return 1
161239

162-
if diff:
240+
if status == "BAD" and diff:
163241
print(
164242
f"Differences found between '{path1.name}' and '{path2.name}':\n",
165243
file=sys.stderr,
166244
)
167-
# Format the diff into a custom readable format and print to stderr
168245
custom_output = format_diff_custom(diff)
169246
print(custom_output, file=sys.stderr)
170247
return 1

0 commit comments

Comments
 (0)