Skip to content

Commit 480cad2

Browse files
committed
test
1 parent 80f3cd1 commit 480cad2

5 files changed

Lines changed: 636 additions & 0 deletions

File tree

compare_backwards_compat_data.py

Lines changed: 370 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,370 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Compare two exports of backwards-compatibility-data.
4+
5+
This script compares NumPy exports created by export_backwards_compat_data.py
6+
to verify that different versions of tiledb-py read the same data identically.
7+
8+
Usage:
9+
python compare_backwards_compat_data.py output_folder_v1 output_folder_v2
10+
"""
11+
12+
import argparse
13+
import json
14+
import sys
15+
from pathlib import Path
16+
from typing import Dict, List, Tuple, Any
17+
18+
import numpy as np
19+
try:
20+
with tiledb.open(uri, 'r') as A:
21+
print(f"Schema: {A.schema}")
22+
ned = A.nonempty_domain()
23+
print(f"Nonempty domain: {ned}")
24+
25+
if ned:
26+
slices = [slice(*x) for x in ned]
27+
data = A[tuple(slices)]
28+
print(f"Success! Data keys: {list(data.keys())}")
29+
except Exception as e:
30+
print(f"ERROR: {e}")
31+
import traceback
32+
traceback.print_exc()
33+
34+
35+
36+
37+
class ComparisonResult:
38+
"""Stores the results of a comparison."""
39+
40+
def __init__(self):
41+
self.identical_count = 0
42+
self.different_count = 0
43+
self.missing_in_dir1 = []
44+
self.missing_in_dir2 = []
45+
self.differences = []
46+
47+
def add_difference(self, path: str, reason: str, details: Any = None):
48+
"""Add a difference to the results."""
49+
self.different_count += 1
50+
self.differences.append({
51+
"path": path,
52+
"reason": reason,
53+
"details": details
54+
})
55+
56+
def add_identical(self):
57+
"""Increment the count of identical items."""
58+
self.identical_count += 1
59+
60+
def print_summary(self):
61+
"""Print a summary of the comparison."""
62+
print("\n" + "=" * 80)
63+
print("COMPARISON SUMMARY")
64+
print("=" * 80)
65+
print(f"Identical items: {self.identical_count}")
66+
print(f"Different items: {self.different_count}")
67+
print(f"Missing in dir1: {len(self.missing_in_dir1)}")
68+
print(f"Missing in dir2: {len(self.missing_in_dir2)}")
69+
print("=" * 80)
70+
71+
if self.missing_in_dir1:
72+
print("\nMissing in first directory:")
73+
for item in self.missing_in_dir1:
74+
print(f" - {item}")
75+
76+
if self.missing_in_dir2:
77+
print("\nMissing in second directory:")
78+
for item in self.missing_in_dir2:
79+
print(f" - {item}")
80+
81+
if self.differences:
82+
print("\nDifferences found:")
83+
for diff in self.differences:
84+
print(f"\n Path: {diff['path']}")
85+
print(f" Reason: {diff['reason']}")
86+
if diff['details']:
87+
print(f" Details: {diff['details']}")
88+
89+
print("\n" + "=" * 80)
90+
if self.different_count == 0 and not self.missing_in_dir1 and not self.missing_in_dir2:
91+
print("SUCCESS: All data is identical!")
92+
else:
93+
print("FAILURE: Differences detected!")
94+
print("=" * 80 + "\n")
95+
96+
return self.different_count == 0 and not self.missing_in_dir1 and not self.missing_in_dir2
97+
98+
99+
def compare_json_files(file1: Path, file2: Path, result: ComparisonResult, rel_path: str) -> bool:
100+
"""
101+
Compare two JSON files.
102+
103+
Returns True if identical, False otherwise.
104+
"""
105+
try:
106+
with open(file1, "r") as f:
107+
data1 = json.load(f)
108+
with open(file2, "r") as f:
109+
data2 = json.load(f)
110+
111+
if data1 == data2:
112+
result.add_identical()
113+
return True
114+
else:
115+
# Find specific differences
116+
diff_keys = []
117+
all_keys = set(data1.keys()) | set(data2.keys())
118+
for key in all_keys:
119+
if key not in data1:
120+
diff_keys.append(f"'{key}' missing in dir1")
121+
elif key not in data2:
122+
diff_keys.append(f"'{key}' missing in dir2")
123+
elif data1[key] != data2[key]:
124+
diff_keys.append(f"'{key}': {data1[key]} != {data2[key]}")
125+
126+
result.add_difference(
127+
rel_path,
128+
"JSON content differs",
129+
"; ".join(diff_keys)
130+
)
131+
return False
132+
133+
except Exception as e:
134+
result.add_difference(rel_path, f"Error comparing JSON: {e}")
135+
return False
136+
137+
138+
def compare_numpy_files(file1: Path, file2: Path, result: ComparisonResult, rel_path: str) -> bool:
139+
"""
140+
Compare two NumPy files.
141+
142+
Returns True if arrays are equal, False otherwise.
143+
"""
144+
try:
145+
arr1 = np.load(file1, allow_pickle=True)
146+
arr2 = np.load(file2, allow_pickle=True)
147+
148+
# Check shapes
149+
if arr1.shape != arr2.shape:
150+
result.add_difference(
151+
rel_path,
152+
"Array shape mismatch",
153+
f"{arr1.shape} != {arr2.shape}"
154+
)
155+
return False
156+
157+
# Check dtypes
158+
if arr1.dtype != arr2.dtype:
159+
result.add_difference(
160+
rel_path,
161+
"Array dtype mismatch",
162+
f"{arr1.dtype} != {arr2.dtype}"
163+
)
164+
return False
165+
166+
# Check values
167+
try:
168+
if np.array_equal(arr1, arr2, equal_nan=True):
169+
result.add_identical()
170+
return True
171+
else:
172+
# Calculate statistics about differences
173+
if np.issubdtype(arr1.dtype, np.number):
174+
diff = np.abs(arr1 - arr2)
175+
max_diff = np.max(diff)
176+
mean_diff = np.mean(diff)
177+
num_different = np.sum(arr1 != arr2)
178+
179+
result.add_difference(
180+
rel_path,
181+
"Array values differ",
182+
f"max_diff={max_diff}, mean_diff={mean_diff}, "
183+
f"num_different={num_different}/{arr1.size}"
184+
)
185+
else:
186+
num_different = np.sum(arr1 != arr2)
187+
result.add_difference(
188+
rel_path,
189+
"Array values differ",
190+
f"num_different={num_different}/{arr1.size}"
191+
)
192+
return False
193+
194+
except Exception as e:
195+
# For complex objects, try direct comparison
196+
if arr1.dtype == object or arr2.dtype == object:
197+
if all(np.array_equal(a, b, equal_nan=True) if isinstance(a, np.ndarray) else a == b
198+
for a, b in zip(arr1.flat, arr2.flat)):
199+
result.add_identical()
200+
return True
201+
else:
202+
result.add_difference(rel_path, "Array object values differ")
203+
return False
204+
else:
205+
raise
206+
207+
except Exception as e:
208+
result.add_difference(rel_path, f"Error comparing NumPy arrays: {e}")
209+
return False
210+
211+
212+
def compare_directories(dir1: Path, dir2: Path, result: ComparisonResult, rel_path: str = "") -> None:
213+
"""
214+
Recursively compare two directories.
215+
216+
Args:
217+
dir1: First directory to compare
218+
dir2: Second directory to compare
219+
result: ComparisonResult object to store results
220+
rel_path: Relative path for reporting
221+
"""
222+
# Get all files and subdirectories
223+
items1 = set(p.name for p in dir1.iterdir())
224+
items2 = set(p.name for p in dir2.iterdir())
225+
226+
# Find missing items
227+
missing_in_dir2 = items1 - items2
228+
missing_in_dir1 = items2 - items1
229+
230+
for item in missing_in_dir2:
231+
result.missing_in_dir2.append(f"{rel_path}/{item}" if rel_path else item)
232+
233+
for item in missing_in_dir1:
234+
result.missing_in_dir1.append(f"{rel_path}/{item}" if rel_path else item)
235+
236+
# Compare common items
237+
common_items = items1 & items2
238+
239+
for item in sorted(common_items):
240+
path1 = dir1 / item
241+
path2 = dir2 / item
242+
item_rel_path = f"{rel_path}/{item}" if rel_path else item
243+
244+
if path1.is_dir() and path2.is_dir():
245+
# Recursively compare directories
246+
compare_directories(path1, path2, result, item_rel_path)
247+
248+
elif path1.is_file() and path2.is_file():
249+
# Skip export_info.json - versions and timestamps are expected to differ
250+
if item == "export_info.json":
251+
continue
252+
253+
# Compare files based on extension
254+
if item.endswith(".json"):
255+
compare_json_files(path1, path2, result, item_rel_path)
256+
elif item.endswith(".npy") or item.endswith(".pkl"):
257+
compare_numpy_files(path1, path2, result, item_rel_path)
258+
elif item.endswith(".txt"):
259+
# Text file comparison
260+
try:
261+
with open(path1, "r") as f:
262+
content1 = f.read()
263+
with open(path2, "r") as f:
264+
content2 = f.read()
265+
266+
if content1 == content2:
267+
result.add_identical()
268+
else:
269+
result.add_difference(item_rel_path, "Text content differs")
270+
except Exception as e:
271+
result.add_difference(item_rel_path, f"Error comparing text: {e}")
272+
else:
273+
# Binary comparison for other files
274+
try:
275+
with open(path1, "rb") as f:
276+
content1 = f.read()
277+
with open(path2, "rb") as f:
278+
content2 = f.read()
279+
280+
if content1 == content2:
281+
result.add_identical()
282+
else:
283+
result.add_difference(item_rel_path, "Binary content differs")
284+
except Exception as e:
285+
result.add_difference(item_rel_path, f"Error comparing binary: {e}")
286+
287+
else:
288+
# Type mismatch (one is file, other is directory)
289+
result.add_difference(
290+
item_rel_path,
291+
"Type mismatch",
292+
f"dir1={'dir' if path1.is_dir() else 'file'}, "
293+
f"dir2={'dir' if path2.is_dir() else 'file'}"
294+
)
295+
296+
297+
def compare_exports(dir1: str, dir2: str) -> bool:
298+
"""
299+
Compare two export directories.
300+
301+
Args:
302+
dir1: First export directory
303+
dir2: Second export directory
304+
305+
Returns:
306+
True if identical, False if differences found
307+
"""
308+
dir1_path = Path(dir1)
309+
dir2_path = Path(dir2)
310+
311+
if not dir1_path.exists():
312+
print(f"Error: Directory not found: {dir1_path}")
313+
sys.exit(1)
314+
315+
if not dir2_path.exists():
316+
print(f"Error: Directory not found: {dir2_path}")
317+
sys.exit(1)
318+
319+
print("Comparing backwards-compatibility-data exports")
320+
print(f" Directory 1: {dir1_path}")
321+
print(f" Directory 2: {dir2_path}")
322+
print()
323+
324+
# Load and display export info (for informational purposes only)
325+
print("Export metadata (not compared):")
326+
for i, d in enumerate([dir1_path, dir2_path], 1):
327+
info_file = d / "export_info.json"
328+
if info_file.exists():
329+
with open(info_file, "r") as f:
330+
info = json.load(f)
331+
print(f" Directory {i}:")
332+
for key, value in info.items():
333+
print(f" {key}: {value}")
334+
print()
335+
336+
# Perform comparison
337+
result = ComparisonResult()
338+
compare_directories(dir1_path, dir2_path, result)
339+
340+
# Print results
341+
return result.print_summary()
342+
343+
344+
def main():
345+
parser = argparse.ArgumentParser(
346+
description="Compare two exports of backwards-compatibility-data"
347+
)
348+
parser.add_argument(
349+
"dir1",
350+
help="First export directory"
351+
)
352+
parser.add_argument(
353+
"dir2",
354+
help="Second export directory"
355+
)
356+
parser.add_argument(
357+
"-v", "--verbose",
358+
action="store_true",
359+
help="Verbose output"
360+
)
361+
362+
args = parser.parse_args()
363+
364+
success = compare_exports(args.dir1, args.dir2)
365+
366+
sys.exit(0 if success else 1)
367+
368+
369+
if __name__ == "__main__":
370+
main()

0 commit comments

Comments
 (0)